tcg_gen_ext16s_i32(dest, var);
}
-/* 32x32->64 multiply. Marks inputs as dead. */
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_mulu2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_muls2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
/* Swap low and high halfwords. */
-static void gen_swap_half(TCGv_i32 var)
+static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
{
- tcg_gen_rotri_i32(var, var, 16);
+ tcg_gen_rotri_i32(dest, var, 16);
}
/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
-static void gen_neon_dup_low16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(var, var);
- tcg_gen_shli_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_high16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_andi_i32(var, var, 0xffff0000);
- tcg_gen_shri_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
{
#ifndef CONFIG_USER_ONLY
gen_rfe(s, pc, load_cpu_field(spsr));
}
-#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
-
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
- default: return;
- }
-}
-
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
-
-#define GEN_NEON_INTEGER_OP_ENV(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
- break; \
- default: return 1; \
- }} while (0)
-
-#define GEN_NEON_INTEGER_OP(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
- break; \
- default: return 1; \
- }} while (0)
-
-static TCGv_i32 neon_load_scratch(int scratch)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- return tmp;
-}
-
-static void neon_store_scratch(int scratch, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- tcg_temp_free_i32(var);
-}
-
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
-{
- TCGv_i32 tmp;
- if (size == 1) {
- tmp = neon_load_reg(reg & 7, reg >> 4);
- if (reg & 8) {
- gen_neon_dup_high16(tmp);
- } else {
- gen_neon_dup_low16(tmp);
- }
- } else {
- tmp = neon_load_reg(reg & 15, reg >> 4);
- }
- return tmp;
-}
-
-static int gen_neon_unzip(int rd, int rm, int size, int q)
-{
- TCGv_ptr pd, pm;
-
- if (!q && size == 2) {
- return 1;
- }
- pd = vfp_reg_ptr(true, rd);
- pm = vfp_reg_ptr(true, rm);
- if (q) {
- switch (size) {
- case 0:
- gen_helper_neon_qunzip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_qunzip16(pd, pm);
- break;
- case 2:
- gen_helper_neon_qunzip32(pd, pm);
- break;
- default:
- abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_unzip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_unzip16(pd, pm);
- break;
- default:
- abort();
- }
- }
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(pm);
- return 0;
-}
-
-static int gen_neon_zip(int rd, int rm, int size, int q)
-{
- TCGv_ptr pd, pm;
-
- if (!q && size == 2) {
- return 1;
- }
- pd = vfp_reg_ptr(true, rd);
- pm = vfp_reg_ptr(true, rm);
- if (q) {
- switch (size) {
- case 0:
- gen_helper_neon_qzip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_qzip16(pd, pm);
- break;
- case 2:
- gen_helper_neon_qzip32(pd, pm);
- break;
- default:
- abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_zip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_zip16(pd, pm);
- break;
- default:
- abort();
- }
- }
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(pm);
- return 0;
-}
-
static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
{
TCGv_i32 rd, tmp;
tcg_temp_free_i32(rd);
}
-static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_narrow_u8(dest, src); break;
- case 1: gen_helper_neon_narrow_u16(dest, src); break;
- case 2: tcg_gen_extrl_i64_i32(dest, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
- int q, int u)
-{
- if (q) {
- if (u) {
- switch (size) {
- case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
- default: abort();
- }
- }
- } else {
- if (u) {
- switch (size) {
- case 1: gen_helper_neon_shl_u16(var, var, shift); break;
- case 2: gen_ushl_i32(var, var, shift); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 1: gen_helper_neon_shl_s16(var, var, shift); break;
- case 2: gen_sshl_i32(var, var, shift); break;
- default: abort();
- }
- }
- }
-}
-
-static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
-{
- if (u) {
- switch (size) {
- case 0: gen_helper_neon_widen_u8(dest, src); break;
- case 1: gen_helper_neon_widen_u16(dest, src); break;
- case 2: tcg_gen_extu_i32_i64(dest, src); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 0: gen_helper_neon_widen_s8(dest, src); break;
- case 1: gen_helper_neon_widen_s16(dest, src); break;
- case 2: tcg_gen_ext_i32_i64(dest, src); break;
- default: abort();
- }
- }
- tcg_temp_free_i32(src);
-}
-
-static inline void gen_neon_addl(int size)
-{
- switch (size) {
- case 0: gen_helper_neon_addl_u16(CPU_V001); break;
- case 1: gen_helper_neon_addl_u32(CPU_V001); break;
- case 2: tcg_gen_add_i64(CPU_V001); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_subl(int size)
-{
- switch (size) {
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
- case 2: tcg_gen_sub_i64(CPU_V001); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_negl(TCGv_i64 var, int size)
-{
- switch (size) {
- case 0: gen_helper_neon_negl_u16(var, var); break;
- case 1: gen_helper_neon_negl_u32(var, var); break;
- case 2:
- tcg_gen_neg_i64(var, var);
- break;
- default: abort();
- }
-}
-
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
-{
- switch (size) {
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
- int size, int u)
-{
- TCGv_i64 tmp;
-
- switch ((size << 1) | u) {
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
- case 4:
- tmp = gen_muls_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- case 5:
- tmp = gen_mulu_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- default: abort();
- }
-
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
- Don't forget to clean them now. */
- if (size < 2) {
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
- }
-}
-
-static void gen_neon_narrow_op(int op, int u, int size,
- TCGv_i32 dest, TCGv_i64 src)
-{
- if (op) {
- if (u) {
- gen_neon_unarrow_sats(size, dest, src);
- } else {
- gen_neon_narrow(size, dest, src);
- }
- } else {
- if (u) {
- gen_neon_narrow_satu(size, dest, src);
- } else {
- gen_neon_narrow_sats(size, dest, src);
- }
- }
-}
-
-/* Symbolic constants for op fields for Neon 3-register same-length.
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
- * table A7-9.
- */
-#define NEON_3R_VHADD 0
-#define NEON_3R_VQADD 1
-#define NEON_3R_VRHADD 2
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
-#define NEON_3R_VHSUB 4
-#define NEON_3R_VQSUB 5
-#define NEON_3R_VCGT 6
-#define NEON_3R_VCGE 7
-#define NEON_3R_VSHL 8
-#define NEON_3R_VQSHL 9
-#define NEON_3R_VRSHL 10
-#define NEON_3R_VQRSHL 11
-#define NEON_3R_VMAX 12
-#define NEON_3R_VMIN 13
-#define NEON_3R_VABD 14
-#define NEON_3R_VABA 15
-#define NEON_3R_VADD_VSUB 16
-#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLS */
-#define NEON_3R_VMUL 19
-#define NEON_3R_VPMAX 20
-#define NEON_3R_VPMIN 21
-#define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD_VQRDMLAH 23
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
-
-static const uint8_t neon_3r_sizes[] = {
- [NEON_3R_VHADD] = 0x7,
- [NEON_3R_VQADD] = 0xf,
- [NEON_3R_VRHADD] = 0x7,
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
- [NEON_3R_VHSUB] = 0x7,
- [NEON_3R_VQSUB] = 0xf,
- [NEON_3R_VCGT] = 0x7,
- [NEON_3R_VCGE] = 0x7,
- [NEON_3R_VSHL] = 0xf,
- [NEON_3R_VQSHL] = 0xf,
- [NEON_3R_VRSHL] = 0xf,
- [NEON_3R_VQRSHL] = 0xf,
- [NEON_3R_VMAX] = 0x7,
- [NEON_3R_VMIN] = 0x7,
- [NEON_3R_VABD] = 0x7,
- [NEON_3R_VABA] = 0x7,
- [NEON_3R_VADD_VSUB] = 0xf,
- [NEON_3R_VTST_VCEQ] = 0x7,
- [NEON_3R_VML] = 0x7,
- [NEON_3R_VMUL] = 0x7,
- [NEON_3R_VPMAX] = 0x7,
- [NEON_3R_VPMIN] = 0x7,
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
-};
-
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
* table A7-13.
[NEON_2RM_VCVT_UF] = 0x4,
};
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz,
+ gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+ opr_sz, max_sz, 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
+}
+
void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
};
tcg_debug_assert(vece >= 1 && vece <= 2);
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
- opr_sz, max_sz, 0, fns[vece - 1]);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
};
tcg_debug_assert(vece >= 1 && vece <= 2);
- tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
- opr_sz, max_sz, 0, fns[vece - 1]);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
#define GEN_CMP0(NAME, COND) \
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
}
-/* Translate a NEON data processing instruction. Return nonzero if the
- instruction is invalid.
- We process data in a mixture of 32-bit and 64-bit chunks.
- Mostly we use 32-bit chunks so we can use normal scalar instructions. */
-
-static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
- int op;
- int q;
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
- int size;
- int shift;
- int pass;
- int count;
- int pairwise;
- int u;
- int vec_size;
- uint32_t imm;
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1, ptr2, ptr3;
- TCGv_i64 tmp64;
+ TCGv_i32 t = tcg_temp_new_i32();
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return 1;
- }
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
- if (!s->vfp_enabled)
- return 1;
- q = (insn & (1 << 6)) != 0;
- u = (insn >> 24) & 1;
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- size = (insn >> 20) & 3;
- vec_size = q ? 16 : 8;
- rd_ofs = neon_reg_offset(rd, 0);
- rn_ofs = neon_reg_offset(rn, 0);
- rm_ofs = neon_reg_offset(rm, 0);
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
- if ((insn & (1 << 23)) == 0) {
- /* Three register same length. */
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
- /* Catch invalid op and bad size combinations: UNDEF */
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
- return 1;
- }
- /* All insns of this form UNDEF for either this condition or the
- * superset of cases "Q==1"; we catch the latter later.
- */
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
- switch (op) {
- case NEON_3R_SHA:
- /* The SHA-1/SHA-256 3-register instructions require special
- * treatment here, as their size field is overloaded as an
- * op type selector, and they all consume their input in a
- * single pass.
- */
- if (!q) {
- return 1;
- }
- if (!u) { /* SHA-1 */
- if (!dc_isar_feature(aa32_sha1, s)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- tmp4 = tcg_const_i32(size);
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
- tcg_temp_free_i32(tmp4);
- } else { /* SHA-256 */
- if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- switch (size) {
- case 0:
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
- break;
- case 1:
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
- break;
- case 2:
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
- break;
- }
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
- return 0;
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
- case NEON_3R_VPADD_VQRDMLAH:
- if (!u) {
- break; /* VPADD */
- }
- /* VQRDMLAH */
- if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
- gen_gvec_sqrdmlah_qc(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- return 0;
- }
- return 1;
-
- case NEON_3R_VFM_VQRDMLSH:
- if (!u) {
- /* VFM, VFMS */
- if (size == 1) {
- return 1;
- }
- break;
- }
- /* VQRDMLSH */
- if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
- gen_gvec_sqrdmlsh_qc(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- return 0;
- }
- return 1;
-
- case NEON_3R_VADD_VSUB:
- case NEON_3R_LOGIC:
- case NEON_3R_VMAX:
- case NEON_3R_VMIN:
- case NEON_3R_VTST_VCEQ:
- case NEON_3R_VCGT:
- case NEON_3R_VCGE:
- case NEON_3R_VQADD:
- case NEON_3R_VQSUB:
- case NEON_3R_VMUL:
- case NEON_3R_VML:
- case NEON_3R_VSHL:
- /* Already handled by decodetree */
- return 1;
- }
-
- if (size == 3) {
- /* 64-bit element instructions. */
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
- neon_load_reg64(cpu_V0, rn + pass);
- neon_load_reg64(cpu_V1, rm + pass);
- switch (op) {
- case NEON_3R_VQSHL:
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VRSHL:
- if (u) {
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VQRSHL:
- if (u) {
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- return 0;
- }
- pairwise = 0;
- switch (op) {
- case NEON_3R_VQSHL:
- case NEON_3R_VRSHL:
- case NEON_3R_VQRSHL:
- {
- int rtmp;
- /* Shift instruction operands are reversed. */
- rtmp = rn;
- rn = rm;
- rm = rtmp;
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- case NEON_3R_VPMAX:
- case NEON_3R_VPMIN:
- pairwise = 1;
- break;
- case NEON_3R_FLOAT_ARITH:
- pairwise = (u && size < 2); /* if VPADD (float) */
- break;
- case NEON_3R_FLOAT_MINMAX:
- pairwise = u; /* if VPMIN/VPMAX (float) */
- break;
- case NEON_3R_FLOAT_CMP:
- if (!u && size) {
- /* no encoding for U=0 C=1x */
- return 1;
- }
- break;
- case NEON_3R_FLOAT_ACMP:
- if (!u) {
- return 1;
- }
- break;
- case NEON_3R_FLOAT_MISC:
- /* VMAXNM/VMINNM in ARMv8 */
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- if (!dc_isar_feature(aa32_simdfmac, s)) {
- return 1;
- }
- break;
- default:
- break;
- }
-
- if (pairwise && q) {
- /* All the pairwise insns UNDEF if Q is set */
- return 1;
- }
-
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
- if (pairwise) {
- /* Pairwise. */
- if (pass < 1) {
- tmp = neon_load_reg(rn, 0);
- tmp2 = neon_load_reg(rn, 1);
- } else {
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- }
- } else {
- /* Elementwise. */
- tmp = neon_load_reg(rn, pass);
- tmp2 = neon_load_reg(rm, pass);
- }
- switch (op) {
- case NEON_3R_VHADD:
- GEN_NEON_INTEGER_OP(hadd);
- break;
- case NEON_3R_VRHADD:
- GEN_NEON_INTEGER_OP(rhadd);
- break;
- case NEON_3R_VHSUB:
- GEN_NEON_INTEGER_OP(hsub);
- break;
- case NEON_3R_VQSHL:
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- case NEON_3R_VRSHL:
- GEN_NEON_INTEGER_OP(rshl);
- break;
- case NEON_3R_VQRSHL:
- GEN_NEON_INTEGER_OP_ENV(qrshl);
- break;
- case NEON_3R_VABD:
- GEN_NEON_INTEGER_OP(abd);
- break;
- case NEON_3R_VABA:
- GEN_NEON_INTEGER_OP(abd);
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- break;
- case NEON_3R_VPMAX:
- GEN_NEON_INTEGER_OP(pmax);
- break;
- case NEON_3R_VPMIN:
- GEN_NEON_INTEGER_OP(pmin);
- break;
- case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
- if (!u) { /* VQDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- } else { /* VQRDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- switch (size) {
- case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- break;
- case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- switch ((u << 2) | size) {
- case 0: /* VADD */
- case 4: /* VPADD */
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- break;
- case 2: /* VSUB */
- gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
- break;
- case 6: /* VABD */
- gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
- break;
- default:
- abort();
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MULTIPLY:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- if (!u) {
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- if (size == 0) {
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_CMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (!u) {
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- if (size == 0) {
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_ACMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MINMAX:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MISC:
- if (u) {
- /* VMAXNM/VMINNM */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- } else {
- if (size == 0) {
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
- } else {
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
- }
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- {
- /* VFMA, VFMS: fused multiply-add */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
- if (size) {
- /* VFMS */
- gen_helper_vfp_negs(tmp, tmp);
- }
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
-
- /* Save the result. For elementwise operations we can put it
- straight into the destination register. For pairwise operations
- we have to be careful to avoid clobbering the source operands. */
- if (pairwise && rd == rm) {
- neon_store_scratch(pass, tmp);
- } else {
- neon_store_reg(rd, pass, tmp);
- }
-
- } /* for pass */
- if (pairwise && rd == rm) {
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tmp = neon_load_scratch(pass);
- neon_store_reg(rd, pass, tmp);
- }
- }
- /* End of 3 register same size operations. */
- } else if (insn & (1 << 4)) {
- if ((insn & 0x00380080) != 0) {
- /* Two registers and shift. */
- op = (insn >> 8) & 0xf;
- if (insn & (1 << 7)) {
- /* 64-bit shift. */
- if (op > 7) {
- return 1;
- }
- size = 3;
- } else {
- size = 2;
- while ((insn & (1 << (size + 19))) == 0)
- size--;
- }
- shift = (insn >> 16) & ((1 << (3 + size)) - 1);
- if (op < 8) {
- /* Shift by immediate:
- VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
- if (q && ((rd | rm) & 1)) {
- return 1;
- }
- if (!u && (op == 4 || op == 6)) {
- return 1;
- }
- /* Right shifts are encoded as N - shift, where N is the
- element size in bits. */
- if (op <= 4) {
- shift = shift - (1 << (size + 3));
- }
-
- switch (op) {
- case 0: /* VSHR */
- /* Right shift comes here negative. */
- shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
- MIN(shift, (8 << size) - 1),
- vec_size, vec_size);
- } else if (shift >= 8 << size) {
- tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
- vec_size, 0);
- } else {
- tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
-
- case 1: /* VSRA */
- /* Right shift comes here negative. */
- shift = -shift;
- if (u) {
- gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- } else {
- gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
-
- case 2: /* VRSHR */
- /* Right shift comes here negative. */
- shift = -shift;
- if (u) {
- gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- } else {
- gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
-
- case 3: /* VRSRA */
- /* Right shift comes here negative. */
- shift = -shift;
- if (u) {
- gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- } else {
- gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
-
- case 4: /* VSRI */
- if (!u) {
- return 1;
- }
- /* Right shift comes here negative. */
- shift = -shift;
- gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- return 0;
-
- case 5: /* VSHL, VSLI */
- if (u) { /* VSLI */
- gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- } else { /* VSHL */
- tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
- }
-
- if (size == 3) {
- count = q + 1;
- } else {
- count = q ? 4: 2;
- }
-
- /* To avoid excessive duplication of ops we implement shift
- * by immediate using the variable shift operations.
- */
- imm = dup_const(size, shift);
-
- for (pass = 0; pass < count; pass++) {
- if (size == 3) {
- neon_load_reg64(cpu_V0, rm + pass);
- tcg_gen_movi_i64(cpu_V1, imm);
- switch (op) {
- case 6: /* VQSHLU */
- gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- break;
- case 7: /* VQSHL */
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- default:
- g_assert_not_reached();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else { /* size < 3 */
- /* Operands in T0 and T1. */
- tmp = neon_load_reg(rm, pass);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, imm);
- switch (op) {
- case 6: /* VQSHLU */
- switch (size) {
- case 0:
- gen_helper_neon_qshlu_s8(tmp, cpu_env,
- tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_qshlu_s16(tmp, cpu_env,
- tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qshlu_s32(tmp, cpu_env,
- tmp, tmp2);
- break;
- default:
- abort();
- }
- break;
- case 7: /* VQSHL */
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- default:
- g_assert_not_reached();
- }
- tcg_temp_free_i32(tmp2);
- neon_store_reg(rd, pass, tmp);
- }
- } /* for pass */
- } else if (op < 10) {
- /* Shift by immediate and narrow:
- VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
- int input_unsigned = (op == 8) ? !u : u;
- if (rm & 1) {
- return 1;
- }
- shift = shift - (1 << (size + 3));
- size++;
- if (size == 3) {
- tmp64 = tcg_const_i64(shift);
- neon_load_reg64(cpu_V0, rm);
- neon_load_reg64(cpu_V1, rm + 1);
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 in;
- if (pass == 0) {
- in = cpu_V0;
- } else {
- in = cpu_V1;
- }
- if (q) {
- if (input_unsigned) {
- gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
- }
- } else {
- if (input_unsigned) {
- gen_ushl_i64(cpu_V0, in, tmp64);
- } else {
- gen_sshl_i64(cpu_V0, in, tmp64);
- }
- }
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
- neon_store_reg(rd, pass, tmp);
- } /* for pass */
- tcg_temp_free_i64(tmp64);
- } else {
- if (size == 1) {
- imm = (uint16_t)shift;
- imm |= imm << 16;
- } else {
- /* size == 2 */
- imm = (uint32_t)shift;
- }
- tmp2 = tcg_const_i32(imm);
- tmp4 = neon_load_reg(rm + 1, 0);
- tmp5 = neon_load_reg(rm + 1, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rm, 0);
- } else {
- tmp = tmp4;
- }
- gen_neon_shift_narrow(size, tmp, tmp2, q,
- input_unsigned);
- if (pass == 0) {
- tmp3 = neon_load_reg(rm, 1);
- } else {
- tmp3 = tmp5;
- }
- gen_neon_shift_narrow(size, tmp3, tmp2, q,
- input_unsigned);
- tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp3);
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
- neon_store_reg(rd, pass, tmp);
- } /* for pass */
- tcg_temp_free_i32(tmp2);
- }
- } else if (op == 10) {
- /* VSHLL, VMOVL */
- if (q || (rd & 1)) {
- return 1;
- }
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 1)
- tmp = tmp2;
-
- gen_neon_widen(cpu_V0, tmp, size, u);
-
- if (shift != 0) {
- /* The shift is less than the width of the source
- type, so we can just shift the whole register. */
- tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
- /* Widen the result of shift: we need to clear
- * the potential overflow bits resulting from
- * left bits of the narrow input appearing as
- * right bits of left the neighbour narrow
- * input. */
- if (size < 2 || !u) {
- uint64_t imm64;
- if (size == 0) {
- imm = (0xffu >> (8 - shift));
- imm |= imm << 16;
- } else if (size == 1) {
- imm = 0xffff >> (16 - shift);
- } else {
- /* size == 2 */
- imm = 0xffffffff >> (32 - shift);
- }
- if (size < 2) {
- imm64 = imm | (((uint64_t)imm) << 32);
- } else {
- imm64 = imm;
- }
- tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
- }
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- } else if (op >= 14) {
- /* VCVT fixed-point. */
- TCGv_ptr fpst;
- TCGv_i32 shiftv;
- VFPGenFixPointFn *fn;
-
- if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
- return 1;
- }
-
- if (!(op & 1)) {
- if (u) {
- fn = gen_helper_vfp_ultos;
- } else {
- fn = gen_helper_vfp_sltos;
- }
- } else {
- if (u) {
- fn = gen_helper_vfp_touls_round_to_zero;
- } else {
- fn = gen_helper_vfp_tosls_round_to_zero;
- }
- }
+ tcg_gen_smin_vec(vece, t, a, b);
+ tcg_gen_smax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- /* We have already masked out the must-be-1 top bit of imm6,
- * hence this 32-shift where the ARM ARM has 64-imm6.
- */
- shift = 32 - shift;
- fpst = get_fpstatus_ptr(1);
- shiftv = tcg_const_i32(shift);
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- TCGv_i32 tmpf = neon_load_reg(rm, pass);
- fn(tmpf, tmpf, shiftv, fpst);
- neon_store_reg(rd, pass, tmpf);
- }
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(shiftv);
- } else {
- return 1;
- }
- } else { /* (insn & 0x00380080) == 0 */
- int invert, reg_ofs, vec_size;
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sabd_i32,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sabd_i64,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- if (q && (rd & 1)) {
- return 1;
- }
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
- op = (insn >> 8) & 0xf;
- /* One register and immediate. */
- imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
- invert = (insn & (1 << 5)) != 0;
- /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
- * We choose to not special-case this and will behave as if a
- * valid constant encoding of 0 had been given.
- */
- switch (op) {
- case 0: case 1:
- /* no-op */
- break;
- case 2: case 3:
- imm <<= 8;
- break;
- case 4: case 5:
- imm <<= 16;
- break;
- case 6: case 7:
- imm <<= 24;
- break;
- case 8: case 9:
- imm |= imm << 16;
- break;
- case 10: case 11:
- imm = (imm << 8) | (imm << 24);
- break;
- case 12:
- imm = (imm << 8) | 0xff;
- break;
- case 13:
- imm = (imm << 16) | 0xffff;
- break;
- case 14:
- imm |= (imm << 8) | (imm << 16) | (imm << 24);
- if (invert) {
- imm = ~imm;
- }
- break;
- case 15:
- if (invert) {
- return 1;
- }
- imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
- | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
- break;
- }
- if (invert) {
- imm = ~imm;
- }
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
- reg_ofs = neon_reg_offset(rd, 0);
- vec_size = q ? 16 : 8;
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
- if (op & 1 && op < 12) {
- if (invert) {
- /* The immediate value has already been inverted,
- * so BIC becomes AND.
- */
- tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
- vec_size, vec_size);
- }
- } else {
- /* VMOV, VMVN. */
- if (op == 14 && invert) {
- TCGv_i64 t64 = tcg_temp_new_i64();
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
- for (pass = 0; pass <= q; ++pass) {
- uint64_t val = 0;
- int n;
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
- for (n = 0; n < 8; n++) {
- if (imm & (1 << (n + pass * 8))) {
- val |= 0xffull << (n * 8);
- }
- }
- tcg_gen_movi_i64(t64, val);
- neon_store_reg64(t64, rd + pass);
- }
- tcg_temp_free_i64(t64);
- } else {
- tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
- vec_size, imm);
- }
- }
- }
- } else { /* (insn & 0x00800010 == 0x00800000) */
- if (size != 3) {
- op = (insn >> 8) & 0xf;
- if ((insn & (1 << 6)) == 0) {
- /* Three registers of different lengths. */
- int src1_wide;
- int src2_wide;
- int prewiden;
- /* undefreq: bit 0 : UNDEF if size == 0
- * bit 1 : UNDEF if size == 1
- * bit 2 : UNDEF if size == 2
- * bit 3 : UNDEF if U == 1
- * Note that [2:0] set implies 'always UNDEF'
- */
- int undefreq;
- /* prewiden, src1_wide, src2_wide, undefreq */
- static const int neon_3reg_wide[16][4] = {
- {1, 0, 0, 0}, /* VADDL */
- {1, 1, 0, 0}, /* VADDW */
- {1, 0, 0, 0}, /* VSUBL */
- {1, 1, 0, 0}, /* VSUBW */
- {0, 1, 1, 0}, /* VADDHN */
- {0, 0, 0, 0}, /* VABAL */
- {0, 1, 1, 0}, /* VSUBHN */
- {0, 0, 0, 0}, /* VABDL */
- {0, 0, 0, 0}, /* VMLAL */
- {0, 0, 0, 9}, /* VQDMLAL */
- {0, 0, 0, 0}, /* VMLSL */
- {0, 0, 0, 9}, /* VQDMLSL */
- {0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 9}, /* VQDMULL */
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
- };
-
- prewiden = neon_3reg_wide[op][0];
- src1_wide = neon_3reg_wide[op][1];
- src2_wide = neon_3reg_wide[op][2];
- undefreq = neon_3reg_wide[op][3];
-
- if ((undefreq & (1 << size)) ||
- ((undefreq & 8) && u)) {
- return 1;
- }
- if ((src1_wide && (rn & 1)) ||
- (src2_wide && (rm & 1)) ||
- (!src2_wide && (rd & 1))) {
- return 1;
- }
+ tcg_gen_umin_vec(vece, t, a, b);
+ tcg_gen_umax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- /* Handle polynomial VMULL in a single pass. */
- if (op == 14) {
- if (size == 0) {
- /* VMULL.P8 */
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
- 0, gen_helper_neon_pmull_h);
- } else {
- /* VMULL.P64 */
- if (!dc_isar_feature(aa32_pmull, s)) {
- return 1;
- }
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
- 0, gen_helper_gvec_pmull_q);
- }
- return 0;
- }
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_uabd_i32,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_uabd_i64,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- /* Avoid overlapping operands. Wide source operands are
- always aligned so will never overlap with wide
- destinations in problematic ways. */
- if (rd == rm && !src2_wide) {
- tmp = neon_load_reg(rm, 1);
- neon_store_scratch(2, tmp);
- } else if (rd == rn && !src1_wide) {
- tmp = neon_load_reg(rn, 1);
- neon_store_scratch(2, tmp);
- }
- tmp3 = NULL;
- for (pass = 0; pass < 2; pass++) {
- if (src1_wide) {
- neon_load_reg64(cpu_V0, rn + pass);
- tmp = NULL;
- } else {
- if (pass == 1 && rd == rn) {
- tmp = neon_load_scratch(2);
- } else {
- tmp = neon_load_reg(rn, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V0, tmp, size, u);
- }
- }
- if (src2_wide) {
- neon_load_reg64(cpu_V1, rm + pass);
- tmp2 = NULL;
- } else {
- if (pass == 1 && rd == rm) {
- tmp2 = neon_load_scratch(2);
- } else {
- tmp2 = neon_load_reg(rm, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V1, tmp2, size, u);
- }
- }
- switch (op) {
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
- gen_neon_addl(size);
- break;
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
- gen_neon_subl(size);
- break;
- case 5: case 7: /* VABAL, VABDL */
- switch ((size << 1) | u) {
- case 0:
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
- break;
- case 3:
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
- break;
- case 4:
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
- break;
- case 5:
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
- break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 8: case 9: case 10: case 11: case 12: case 13:
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- break;
- default: /* 15 is RESERVED: caught earlier */
- abort();
- }
- if (op == 13) {
- /* VQDMULL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 5 || (op >= 8 && op <= 11)) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- switch (op) {
- case 10: /* VMLSL */
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 5: case 8: /* VABAL, VMLAL */
- gen_neon_addl(size);
- break;
- case 9: case 11: /* VQDMLAL, VQDMLSL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 11) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 4 || op == 6) {
- /* Narrowing operation. */
- tmp = tcg_temp_new_i32();
- if (!u) {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- }
- if (pass == 0) {
- tmp3 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp3);
- neon_store_reg(rd, 1, tmp);
- }
- } else {
- /* Write back the result. */
- neon_store_reg64(cpu_V0, rd + pass);
- }
- }
- } else {
- /* Two registers and a scalar. NB that for ops of this form
- * the ARM ARM labels bit 24 as Q, but it is in our variable
- * 'u', not 'q'.
- */
- if (size == 0) {
- return 1;
- }
- switch (op) {
- case 1: /* Float VMLA scalar */
- case 5: /* Floating point VMLS scalar */
- case 9: /* Floating point VMUL scalar */
- if (size == 1) {
- return 1;
- }
- /* fall through */
- case 0: /* Integer VMLA scalar */
- case 4: /* Integer VMLS scalar */
- case 8: /* Integer VMUL scalar */
- case 12: /* VQDMULH scalar */
- case 13: /* VQRDMULH scalar */
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- tmp = neon_get_scalar(size, rm);
- neon_store_scratch(0, tmp);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_scratch(0);
- tmp2 = neon_load_reg(rn, pass);
- if (op == 12) {
- if (size == 1) {
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op == 13) {
- if (size == 1) {
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op & 1) {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- } else {
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- tcg_temp_free_i32(tmp2);
- if (op < 8) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- switch (op) {
- case 0:
- gen_neon_add(size, tmp, tmp2);
- break;
- case 1:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case 4:
- gen_neon_rsb(size, tmp, tmp2);
- break;
- case 5:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- }
- break;
- case 3: /* VQDMLAL scalar */
- case 7: /* VQDMLSL scalar */
- case 11: /* VQDMULL scalar */
- if (u == 1) {
- return 1;
- }
- /* fall through */
- case 2: /* VMLAL sclar */
- case 6: /* VMLSL scalar */
- case 10: /* VMULL scalar */
- if (rd & 1) {
- return 1;
- }
- tmp2 = neon_get_scalar(size, rm);
- /* We need a copy of tmp2 because gen_neon_mull
- * deletes it during pass 0. */
- tmp4 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp4, tmp2);
- tmp3 = neon_load_reg(rn, 1);
-
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rn, 0);
- } else {
- tmp = tmp3;
- tmp2 = tmp4;
- }
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- if (op != 11) {
- neon_load_reg64(cpu_V1, rd + pass);
- }
- switch (op) {
- case 6:
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 2:
- gen_neon_addl(size);
- break;
- case 3: case 7:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 7) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- case 10:
- /* no-op */
- break;
- case 11:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case 14: /* VQRDMLAH scalar */
- case 15: /* VQRDMLSH scalar */
- {
- NeonGenThreeOpEnvFn *fn;
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_sabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
- if (!dc_isar_feature(aa32_rdm, s)) {
- return 1;
- }
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- if (op == 14) {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlah_s16;
- } else {
- fn = gen_helper_neon_qrdmlah_s32;
- }
- } else {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlsh_s16;
- } else {
- fn = gen_helper_neon_qrdmlsh_s32;
- }
- }
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
- tmp2 = neon_get_scalar(size, rm);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_reg(rn, pass);
- tmp3 = neon_load_reg(rd, pass);
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- neon_store_reg(rd, pass, tmp);
- }
- tcg_temp_free_i32(tmp2);
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
- } else { /* size == 3 */
- if (!u) {
- /* Extract. */
- imm = (insn >> 8) & 0xf;
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_sabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- if (imm > 7 && !q)
- return 1;
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_saba_i32,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_saba_i64,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_uabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
- if (imm == 0) {
- neon_load_reg64(cpu_V0, rn);
- if (q) {
- neon_load_reg64(cpu_V1, rn + 1);
- }
- } else if (imm == 8) {
- neon_load_reg64(cpu_V0, rn + 1);
- if (q) {
- neon_load_reg64(cpu_V1, rm);
- }
- } else if (q) {
- tmp64 = tcg_temp_new_i64();
- if (imm < 8) {
- neon_load_reg64(cpu_V0, rn);
- neon_load_reg64(tmp64, rn + 1);
- } else {
- neon_load_reg64(cpu_V0, rn + 1);
- neon_load_reg64(tmp64, rm);
- }
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- if (imm < 8) {
- neon_load_reg64(cpu_V1, rm);
- } else {
- neon_load_reg64(cpu_V1, rm + 1);
- imm -= 8;
- }
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- /* BUGFIX */
- neon_load_reg64(cpu_V0, rn);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
- neon_load_reg64(cpu_V1, rm);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd);
- if (q) {
- neon_store_reg64(cpu_V1, rd + 1);
- }
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_uabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_uaba_i32,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_uaba_i64,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+/* Translate a NEON data processing instruction. Return nonzero if the
+ instruction is invalid.
+ We process data in a mixture of 32-bit and 64-bit chunks.
+ Mostly we use 32-bit chunks so we can use normal scalar instructions. */
+
+static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
+{
+ int op;
+ int q;
+ int rd, rm;
+ int size;
+ int pass;
+ int u;
+ TCGv_i32 tmp, tmp2;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return 1;
+ }
+
+ /* FIXME: this access check should not take precedence over UNDEF
+ * for invalid encodings; we will generate incorrect syndrome information
+ * for attempts to execute invalid vfp/neon encodings with FP disabled.
+ */
+ if (s->fp_excp_el) {
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
+ return 0;
+ }
+
+ if (!s->vfp_enabled)
+ return 1;
+ q = (insn & (1 << 6)) != 0;
+ u = (insn >> 24) & 1;
+ VFP_DREG_D(rd, insn);
+ VFP_DREG_M(rm, insn);
+ size = (insn >> 20) & 3;
+
+ if ((insn & (1 << 23)) == 0) {
+ /* Three register same length: handled by decodetree */
+ return 1;
+ } else if (insn & (1 << 4)) {
+ /* Two registers and shift or reg and imm: handled by decodetree */
+ return 1;
+ } else { /* (insn & 0x00800010 == 0x00800000) */
+ if (size != 3) {
+ /*
+ * Three registers of different lengths, or two registers and
+ * a scalar: handled by decodetree
+ */
+ return 1;
+ } else { /* size == 3 */
+ if (!u) {
+ /* Extract: handled by decodetree */
+ return 1;
} else if ((insn & (1 << 11)) == 0) {
/* Two register misc. */
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
!arm_dc_feature(s, ARM_FEATURE_V8)) {
return 1;
}
- if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
- q && ((rm | rd) & 1)) {
+ if (q && ((rm | rd) & 1)) {
return 1;
}
switch (op) {
case NEON_2RM_VREV64:
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
- tmp = neon_load_reg(rm, pass * 2);
- tmp2 = neon_load_reg(rm, pass * 2 + 1);
- switch (size) {
- case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
- case 1: gen_swap_half(tmp); break;
- case 2: /* no-op */ break;
- default: abort();
- }
- neon_store_reg(rd, pass * 2 + 1, tmp);
- if (size == 2) {
- neon_store_reg(rd, pass * 2, tmp2);
- } else {
- switch (size) {
- case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
- case 1: gen_swap_half(tmp2); break;
- default: abort();
- }
- neon_store_reg(rd, pass * 2, tmp2);
- }
- }
- break;
case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
- for (pass = 0; pass < q + 1; pass++) {
- tmp = neon_load_reg(rm, pass * 2);
- gen_neon_widen(cpu_V0, tmp, size, op & 1);
- tmp = neon_load_reg(rm, pass * 2 + 1);
- gen_neon_widen(cpu_V1, tmp, size, op & 1);
- switch (size) {
- case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
- case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
- case 2: tcg_gen_add_i64(CPU_V001); break;
- default: abort();
- }
- if (op >= NEON_2RM_VPADAL) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- gen_neon_addl(size);
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case NEON_2RM_VTRN:
- if (size == 2) {
- int n;
- for (n = 0; n < (q ? 4 : 2); n += 2) {
- tmp = neon_load_reg(rm, n);
- tmp2 = neon_load_reg(rd, n + 1);
- neon_store_reg(rm, n, tmp2);
- neon_store_reg(rd, n + 1, tmp);
- }
- } else {
- goto elementwise;
- }
- break;
case NEON_2RM_VUZP:
- if (gen_neon_unzip(rd, rm, size, q)) {
- return 1;
- }
- break;
case NEON_2RM_VZIP:
- if (gen_neon_zip(rd, rm, size, q)) {
- return 1;
- }
- break;
case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
- /* also VQMOVUN; op field and mnemonics don't line up */
- if (rm & 1) {
- return 1;
- }
- tmp2 = NULL;
- for (pass = 0; pass < 2; pass++) {
- neon_load_reg64(cpu_V0, rm + pass);
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
- tmp, cpu_V0);
- if (pass == 0) {
- tmp2 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp);
- }
- }
- break;
case NEON_2RM_VSHLL:
- if (q || (rd & 1)) {
- return 1;
- }
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 1)
- tmp = tmp2;
- gen_neon_widen(cpu_V0, tmp, size, 1);
- tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
case NEON_2RM_VCVT_F16_F32:
- {
- TCGv_ptr fpst;
- TCGv_i32 ahp;
-
- if (!dc_isar_feature(aa32_fp16_spconv, s) ||
- q || (rm & 1)) {
- return 1;
- }
- fpst = get_fpstatus_ptr(true);
- ahp = get_ahp_flag();
- tmp = neon_load_reg(rm, 0);
- gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
- tmp2 = neon_load_reg(rm, 1);
- gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp2, tmp2, tmp);
- tcg_temp_free_i32(tmp);
- tmp = neon_load_reg(rm, 2);
- gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
- tmp3 = neon_load_reg(rm, 3);
- neon_store_reg(rd, 0, tmp2);
- gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
- tcg_gen_shli_i32(tmp3, tmp3, 16);
- tcg_gen_or_i32(tmp3, tmp3, tmp);
- neon_store_reg(rd, 1, tmp3);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
case NEON_2RM_VCVT_F32_F16:
- {
- TCGv_ptr fpst;
- TCGv_i32 ahp;
- if (!dc_isar_feature(aa32_fp16_spconv, s) ||
- q || (rd & 1)) {
- return 1;
- }
- fpst = get_fpstatus_ptr(true);
- ahp = get_ahp_flag();
- tmp3 = tcg_temp_new_i32();
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- tcg_gen_ext16u_i32(tmp3, tmp);
- gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
- neon_store_reg(rd, 0, tmp3);
- tcg_gen_shri_i32(tmp, tmp, 16);
- gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
- neon_store_reg(rd, 1, tmp);
- tmp3 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(tmp3, tmp2);
- gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
- neon_store_reg(rd, 2, tmp3);
- tcg_gen_shri_i32(tmp2, tmp2, 16);
- gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
- neon_store_reg(rd, 3, tmp2);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case NEON_2RM_AESE: case NEON_2RM_AESMC:
- if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- /* Bit 6 is the lowest opcode bit; it distinguishes between
- * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
- */
- tmp3 = tcg_const_i32(extract32(insn, 6, 1));
-
- if (op == NEON_2RM_AESE) {
- gen_helper_crypto_aese(ptr1, ptr2, tmp3);
- } else {
- gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_i32(tmp3);
- break;
- case NEON_2RM_SHA1H:
- if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- gen_helper_crypto_sha1h(ptr1, ptr2);
-
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- break;
- case NEON_2RM_SHA1SU1:
- if ((rm | rd) & 1) {
- return 1;
- }
- /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
- if (q) {
- if (!dc_isar_feature(aa32_sha2, s)) {
- return 1;
- }
- } else if (!dc_isar_feature(aa32_sha1, s)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
- if (q) {
- gen_helper_crypto_sha256su0(ptr1, ptr2);
- } else {
- gen_helper_crypto_sha1su1(ptr1, ptr2);
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- break;
-
case NEON_2RM_VMVN:
- tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
case NEON_2RM_VNEG:
- tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
case NEON_2RM_VABS:
- tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
-
case NEON_2RM_VCEQ0:
- gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
case NEON_2RM_VCGT0:
- gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
case NEON_2RM_VCLE0:
- gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
case NEON_2RM_VCGE0:
- gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
- break;
case NEON_2RM_VCLT0:
- gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ case NEON_2RM_AESE: case NEON_2RM_AESMC:
+ case NEON_2RM_SHA1H:
+ case NEON_2RM_SHA1SU1:
+ /* handled by decodetree */
+ return 1;
+ case NEON_2RM_VTRN:
+ if (size == 2) {
+ int n;
+ for (n = 0; n < (q ? 4 : 2); n += 2) {
+ tmp = neon_load_reg(rm, n);
+ tmp2 = neon_load_reg(rd, n + 1);
+ neon_store_reg(rm, n, tmp2);
+ neon_store_reg(rd, n + 1, tmp);
+ }
+ } else {
+ goto elementwise;
+ }
break;
default:
case NEON_2RM_VREV32:
switch (size) {
case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
- case 1: gen_swap_half(tmp); break;
+ case 1: gen_swap_half(tmp, tmp); break;
default: abort();
}
break;
}
break;
}
- } else if ((insn & (1 << 10)) == 0) {
- /* VTBL, VTBX. */
- int n = ((insn >> 8) & 3) + 1;
- if ((rn + n) > 32) {
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
- * helper function running off the end of the register file.
- */
- return 1;
- }
- n <<= 3;
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 0);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp2 = neon_load_reg(rm, 0);
- ptr1 = vfp_reg_ptr(true, rn);
- tmp5 = tcg_const_i32(n);
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp);
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 1);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp3 = neon_load_reg(rm, 1);
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp5);
- tcg_temp_free_ptr(ptr1);
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp3);
- tcg_temp_free_i32(tmp);
- } else if ((insn & 0x380) == 0) {
- /* VDUP */
- int element;
- MemOp size;
-
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
- return 1;
- }
- if (insn & (1 << 16)) {
- size = MO_8;
- element = (insn >> 17) & 7;
- } else if (insn & (1 << 17)) {
- size = MO_16;
- element = (insn >> 18) & 3;
- } else {
- size = MO_32;
- element = (insn >> 19) & 1;
- }
- tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
- neon_element_offset(rm, element, size),
- q ? 16 : 8, q ? 16 : 8);
} else {
+ /* VTBL, VTBX, VDUP: handled by decodetree */
return 1;
}
}
t1 = load_reg(s, a->rn);
t2 = load_reg(s, a->rm);
if (m_swap) {
- gen_swap_half(t2);
+ gen_swap_half(t2, t2);
}
gen_smul_dual(t1, t2);
t1 = load_reg(s, a->rn);
t2 = load_reg(s, a->rm);
if (m_swap) {
- gen_swap_half(t2);
+ gen_swap_half(t2, t2);
}
gen_smul_dual(t1, t2);