X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=target-arm%2Ftranslate.c;h=c7961b809711100588bb23cbeea1cae48a138bae;hb=4ec648dd6ed97182d123e0df5bc5012c734aa858;hp=5ffbace5ae6f88256683ae45fa6b0c649a966a84;hpb=25f84f79481db5363c638dd95d5c2a0a0e430cee;p=qemu.git diff --git a/target-arm/translate.c b/target-arm/translate.c index 5ffbace5a..c7961b809 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -25,14 +25,13 @@ #include #include "cpu.h" -#include "exec-all.h" #include "disas.h" #include "tcg-op.h" #include "qemu-log.h" -#include "helpers.h" +#include "helper.h" #define GEN_HELPER 1 -#include "helpers.h" +#include "helper.h" #define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T) #define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5) @@ -129,7 +128,7 @@ void arm_translate_init(void) #endif #define GEN_HELPER 2 -#include "helpers.h" +#include "helper.h" } static inline TCGv load_cpu_offset(int offset) @@ -893,13 +892,29 @@ static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn, } } +static TCGv_ptr get_fpstatus_ptr(int neon) +{ + TCGv_ptr statusptr = tcg_temp_new_ptr(); + int offset; + if (neon) { + offset = offsetof(CPUState, vfp.standard_fp_status); + } else { + offset = offsetof(CPUState, vfp.fp_status); + } + tcg_gen_addi_ptr(statusptr, cpu_env, offset); + return statusptr; +} + #define VFP_OP2(name) \ static inline void gen_vfp_##name(int dp) \ { \ - if (dp) \ - gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \ - else \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \ + TCGv_ptr fpst = get_fpstatus_ptr(0); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \ + } \ + tcg_temp_free_ptr(fpst); \ } VFP_OP2(add) @@ -909,6 +924,28 @@ VFP_OP2(div) #undef VFP_OP2 +static inline void gen_vfp_F1_mul(int dp) +{ + /* Like gen_vfp_mul() but put result in F1 */ + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst); + } else { + gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst); + } + tcg_temp_free_ptr(fpst); +} + +static inline void gen_vfp_F1_neg(int dp) +{ + /* Like gen_vfp_neg() but put result in F1 */ + if (dp) { + gen_helper_vfp_negd(cpu_F1d, cpu_F0d); + } else { + gen_helper_vfp_negs(cpu_F1s, cpu_F0s); + } +} + static inline void gen_vfp_abs(int dp) { if (dp) @@ -957,63 +994,52 @@ static inline void gen_vfp_F1_ld0(int dp) tcg_gen_movi_i32(cpu_F1s, 0); } -static inline void gen_vfp_uito(int dp) -{ - if (dp) - gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env); - else - gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env); -} - -static inline void gen_vfp_sito(int dp) -{ - if (dp) - gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env); - else - gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env); -} - -static inline void gen_vfp_toui(int dp) -{ - if (dp) - gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env); +#define VFP_GEN_ITOF(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ } -static inline void gen_vfp_touiz(int dp) -{ - if (dp) - gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env); -} +VFP_GEN_ITOF(uito) +VFP_GEN_ITOF(sito) +#undef VFP_GEN_ITOF -static inline void gen_vfp_tosi(int dp) -{ - if (dp) - gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env); +#define VFP_GEN_FTOI(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ } -static inline void gen_vfp_tosiz(int dp) -{ - if (dp) - gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env); -} +VFP_GEN_FTOI(toui) +VFP_GEN_FTOI(touiz) +VFP_GEN_FTOI(tosi) +VFP_GEN_FTOI(tosiz) +#undef VFP_GEN_FTOI #define VFP_GEN_FIX(name) \ -static inline void gen_vfp_##name(int dp, int shift) \ +static inline void gen_vfp_##name(int dp, int shift, int neon) \ { \ TCGv tmp_shift = tcg_const_i32(shift); \ - if (dp) \ - gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, cpu_env);\ - else \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, cpu_env);\ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \ + } \ tcg_temp_free_i32(tmp_shift); \ + tcg_temp_free_ptr(statusptr); \ } VFP_GEN_FIX(tosh) VFP_GEN_FIX(tosl) @@ -1177,15 +1203,22 @@ static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \ } -#define IWMMXT_OP_SIZE(name) \ -IWMMXT_OP(name##b) \ -IWMMXT_OP(name##w) \ -IWMMXT_OP(name##l) +#define IWMMXT_OP_ENV(name) \ +static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ +{ \ + iwmmxt_load_reg(cpu_V1, rn); \ + gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \ +} + +#define IWMMXT_OP_ENV_SIZE(name) \ +IWMMXT_OP_ENV(name##b) \ +IWMMXT_OP_ENV(name##w) \ +IWMMXT_OP_ENV(name##l) -#define IWMMXT_OP_1(name) \ +#define IWMMXT_OP_ENV1(name) \ static inline void gen_op_iwmmxt_##name##_M0(void) \ { \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_M0); \ + gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \ } IWMMXT_OP(maddsq) @@ -1199,51 +1232,51 @@ IWMMXT_OP(muluhw) IWMMXT_OP(macsw) IWMMXT_OP(macuw) -IWMMXT_OP_SIZE(unpackl) -IWMMXT_OP_SIZE(unpackh) - -IWMMXT_OP_1(unpacklub) -IWMMXT_OP_1(unpackluw) -IWMMXT_OP_1(unpacklul) -IWMMXT_OP_1(unpackhub) -IWMMXT_OP_1(unpackhuw) -IWMMXT_OP_1(unpackhul) -IWMMXT_OP_1(unpacklsb) -IWMMXT_OP_1(unpacklsw) -IWMMXT_OP_1(unpacklsl) -IWMMXT_OP_1(unpackhsb) -IWMMXT_OP_1(unpackhsw) -IWMMXT_OP_1(unpackhsl) - -IWMMXT_OP_SIZE(cmpeq) -IWMMXT_OP_SIZE(cmpgtu) -IWMMXT_OP_SIZE(cmpgts) - -IWMMXT_OP_SIZE(mins) -IWMMXT_OP_SIZE(minu) -IWMMXT_OP_SIZE(maxs) -IWMMXT_OP_SIZE(maxu) - -IWMMXT_OP_SIZE(subn) -IWMMXT_OP_SIZE(addn) -IWMMXT_OP_SIZE(subu) -IWMMXT_OP_SIZE(addu) -IWMMXT_OP_SIZE(subs) -IWMMXT_OP_SIZE(adds) - -IWMMXT_OP(avgb0) -IWMMXT_OP(avgb1) -IWMMXT_OP(avgw0) -IWMMXT_OP(avgw1) +IWMMXT_OP_ENV_SIZE(unpackl) +IWMMXT_OP_ENV_SIZE(unpackh) + +IWMMXT_OP_ENV1(unpacklub) +IWMMXT_OP_ENV1(unpackluw) +IWMMXT_OP_ENV1(unpacklul) +IWMMXT_OP_ENV1(unpackhub) +IWMMXT_OP_ENV1(unpackhuw) +IWMMXT_OP_ENV1(unpackhul) +IWMMXT_OP_ENV1(unpacklsb) +IWMMXT_OP_ENV1(unpacklsw) +IWMMXT_OP_ENV1(unpacklsl) +IWMMXT_OP_ENV1(unpackhsb) +IWMMXT_OP_ENV1(unpackhsw) +IWMMXT_OP_ENV1(unpackhsl) + +IWMMXT_OP_ENV_SIZE(cmpeq) +IWMMXT_OP_ENV_SIZE(cmpgtu) +IWMMXT_OP_ENV_SIZE(cmpgts) + +IWMMXT_OP_ENV_SIZE(mins) +IWMMXT_OP_ENV_SIZE(minu) +IWMMXT_OP_ENV_SIZE(maxs) +IWMMXT_OP_ENV_SIZE(maxu) + +IWMMXT_OP_ENV_SIZE(subn) +IWMMXT_OP_ENV_SIZE(addn) +IWMMXT_OP_ENV_SIZE(subu) +IWMMXT_OP_ENV_SIZE(addu) +IWMMXT_OP_ENV_SIZE(subs) +IWMMXT_OP_ENV_SIZE(adds) + +IWMMXT_OP_ENV(avgb0) +IWMMXT_OP_ENV(avgb1) +IWMMXT_OP_ENV(avgw0) +IWMMXT_OP_ENV(avgw1) IWMMXT_OP(msadb) -IWMMXT_OP(packuw) -IWMMXT_OP(packul) -IWMMXT_OP(packuq) -IWMMXT_OP(packsw) -IWMMXT_OP(packsl) -IWMMXT_OP(packsq) +IWMMXT_OP_ENV(packuw) +IWMMXT_OP_ENV(packul) +IWMMXT_OP_ENV(packuq) +IWMMXT_OP_ENV(packsw) +IWMMXT_OP_ENV(packsl) +IWMMXT_OP_ENV(packsq) static void gen_op_iwmmxt_set_mup(void) { @@ -1331,7 +1364,7 @@ static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest) return 0; } -/* Disassemble an iwMMXt instruction. Returns nonzero if an error occured +/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) { @@ -1977,13 +2010,13 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } switch ((insn >> 22) & 3) { case 1: - gen_helper_iwmmxt_srlw(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_srll(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_srlq(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp); break; } tcg_temp_free_i32(tmp); @@ -2005,13 +2038,13 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } switch ((insn >> 22) & 3) { case 1: - gen_helper_iwmmxt_sraw(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_sral(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_sraq(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp); break; } tcg_temp_free_i32(tmp); @@ -2033,13 +2066,13 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } switch ((insn >> 22) & 3) { case 1: - gen_helper_iwmmxt_sllw(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_slll(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_sllq(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp); break; } tcg_temp_free_i32(tmp); @@ -2061,21 +2094,21 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp); return 1; } - gen_helper_iwmmxt_rorw(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp); break; case 2: if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { tcg_temp_free_i32(tmp); return 1; } - gen_helper_iwmmxt_rorl(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp); break; case 3: if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { tcg_temp_free_i32(tmp); return 1; } - gen_helper_iwmmxt_rorq(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp); break; } tcg_temp_free_i32(tmp); @@ -2209,7 +2242,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); - gen_helper_iwmmxt_shufh(cpu_M0, cpu_M0, tmp); + gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp); tcg_temp_free(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); @@ -2335,7 +2368,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) return 0; } -/* Disassemble an XScale DSP instruction. Returns nonzero if an error occured +/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) { @@ -2438,23 +2471,33 @@ static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn) return 0; } -static int cp15_user_ok(uint32_t insn) +static int cp15_user_ok(CPUState *env, uint32_t insn) { int cpn = (insn >> 16) & 0xf; int cpm = insn & 0xf; int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38); + if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) { + /* Performance monitor registers fall into three categories: + * (a) always UNDEF in usermode + * (b) UNDEF only if PMUSERENR.EN is 0 + * (c) always read OK and UNDEF on write (PMUSERENR only) + */ + if ((cpm == 12 && (op < 6)) || + (cpm == 13 && (op < 3))) { + return env->cp15.c9_pmuserenr; + } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) { + /* PMUSERENR, read only */ + return 1; + } + return 0; + } + if (cpn == 13 && cpm == 0) { /* TLS register. */ if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT))) return 1; } - if (cpn == 7) { - /* ISB, DSB, DMB. */ - if ((cpm == 5 && op == 4) - || (cpm == 10 && (op == 4 || op == 5))) - return 1; - } return 0; } @@ -2530,39 +2573,60 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) /* cdp */ return 1; } - if (IS_USER(s) && !cp15_user_ok(insn)) { - return 1; - } - - /* Pre-v7 versions of the architecture implemented WFI via coprocessor - * instructions rather than a separate instruction. + /* We special case a number of cp15 instructions which were used + * for things which are real instructions in ARMv7. This allows + * them to work in linux-user mode which doesn't provide functional + * get_cp15/set_cp15 helpers, and is more efficient anyway. */ - if ((insn & 0x0fff0fff) == 0x0e070f90) { + switch ((insn & 0x0fff0fff)) { + case 0x0e070f90: /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores). * In v7, this must NOP. */ + if (IS_USER(s)) { + return 1; + } if (!arm_feature(env, ARM_FEATURE_V7)) { /* Wait for interrupt. */ gen_set_pc_im(s->pc); s->is_jmp = DISAS_WFI; } return 0; - } - - if ((insn & 0x0fff0fff) == 0x0e070f58) { + case 0x0e070f58: /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI, * so this is slightly over-broad. */ - if (!arm_feature(env, ARM_FEATURE_V6)) { + if (!IS_USER(s) && !arm_feature(env, ARM_FEATURE_V6)) { /* Wait for interrupt. */ gen_set_pc_im(s->pc); s->is_jmp = DISAS_WFI; return 0; } - /* Otherwise fall through to handle via helper function. + /* Otherwise continue to handle via helper function. * In particular, on v7 and some v6 cores this is one of * the VA-PA registers. */ + break; + case 0x0e070f3d: + /* 0,c7,c13,1: prefetch-by-MVA in v6, NOP in v7 */ + if (arm_feature(env, ARM_FEATURE_V6)) { + return IS_USER(s) ? 1 : 0; + } + break; + case 0x0e070f95: /* 0,c7,c5,4 : ISB */ + case 0x0e070f9a: /* 0,c7,c10,4: DSB */ + case 0x0e070fba: /* 0,c7,c10,5: DMB */ + /* Barriers in both v6 and v7 */ + if (arm_feature(env, ARM_FEATURE_V6)) { + return 0; + } + break; + default: + break; + } + + if (IS_USER(s) && !cp15_user_ok(env, insn)) { + return 1; } rd = (insn >> 12) & 0xf; @@ -2681,7 +2745,7 @@ static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size) return tmp; } -/* Disassemble a VFP instruction. Returns nonzero if an error occured +/* Disassemble a VFP instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) { @@ -3021,27 +3085,34 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) for (;;) { /* Perform the calculation. */ switch (op) { - case 0: /* mac: fd + (fn * fm) */ - gen_vfp_mul(dp); - gen_mov_F1_vreg(dp, rd); + case 0: /* VMLA: fd + (fn * fm) */ + /* Note that order of inputs to the add matters for NaNs */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_add(dp); break; - case 1: /* nmac: fd - (fn * fm) */ + case 1: /* VMLS: fd + -(fn * fm) */ gen_vfp_mul(dp); - gen_vfp_neg(dp); - gen_mov_F1_vreg(dp, rd); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_add(dp); break; - case 2: /* msc: -fd + (fn * fm) */ - gen_vfp_mul(dp); - gen_mov_F1_vreg(dp, rd); - gen_vfp_sub(dp); + case 2: /* VNMLS: -fd + (fn * fm) */ + /* Note that it isn't valid to replace (-A + B) with (B - A) + * or similar plausible looking simplifications + * because this will give wrong results for NaNs. + */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_neg(dp); + gen_vfp_add(dp); break; - case 3: /* nmsc: -fd - (fn * fm) */ + case 3: /* VNMLA: -fd + -(fn * fm) */ gen_vfp_mul(dp); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_neg(dp); - gen_mov_F1_vreg(dp, rd); - gen_vfp_sub(dp); + gen_vfp_add(dp); break; case 4: /* mul: fn * fm */ gen_vfp_mul(dp); @@ -3156,62 +3227,62 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env); break; case 16: /* fuito */ - gen_vfp_uito(dp); + gen_vfp_uito(dp, 0); break; case 17: /* fsito */ - gen_vfp_sito(dp); + gen_vfp_sito(dp, 0); break; case 20: /* fshto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_shto(dp, 16 - rm); + gen_vfp_shto(dp, 16 - rm, 0); break; case 21: /* fslto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_slto(dp, 32 - rm); + gen_vfp_slto(dp, 32 - rm, 0); break; case 22: /* fuhto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_uhto(dp, 16 - rm); + gen_vfp_uhto(dp, 16 - rm, 0); break; case 23: /* fulto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_ulto(dp, 32 - rm); + gen_vfp_ulto(dp, 32 - rm, 0); break; case 24: /* ftoui */ - gen_vfp_toui(dp); + gen_vfp_toui(dp, 0); break; case 25: /* ftouiz */ - gen_vfp_touiz(dp); + gen_vfp_touiz(dp, 0); break; case 26: /* ftosi */ - gen_vfp_tosi(dp); + gen_vfp_tosi(dp, 0); break; case 27: /* ftosiz */ - gen_vfp_tosiz(dp); + gen_vfp_tosiz(dp, 0); break; case 28: /* ftosh */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_tosh(dp, 16 - rm); + gen_vfp_tosh(dp, 16 - rm, 0); break; case 29: /* ftosl */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_tosl(dp, 32 - rm); + gen_vfp_tosl(dp, 32 - rm, 0); break; case 30: /* ftouh */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_touh(dp, 16 - rm); + gen_vfp_touh(dp, 16 - rm, 0); break; case 31: /* ftoul */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_toul(dp, 32 - rm); + gen_vfp_toul(dp, 32 - rm, 0); break; default: /* undefined */ printf ("rn:%d\n", rn); @@ -3662,7 +3733,7 @@ static inline TCGv neon_get_scalar(int size, int reg) static int gen_neon_unzip(int rd, int rm, int size, int q) { TCGv tmp, tmp2; - if (size == 3 || (!q && size == 2)) { + if (!q && size == 2) { return 1; } tmp = tcg_const_i32(rd); @@ -3670,13 +3741,13 @@ static int gen_neon_unzip(int rd, int rm, int size, int q) if (q) { switch (size) { case 0: - gen_helper_neon_qunzip8(tmp, tmp2); + gen_helper_neon_qunzip8(cpu_env, tmp, tmp2); break; case 1: - gen_helper_neon_qunzip16(tmp, tmp2); + gen_helper_neon_qunzip16(cpu_env, tmp, tmp2); break; case 2: - gen_helper_neon_qunzip32(tmp, tmp2); + gen_helper_neon_qunzip32(cpu_env, tmp, tmp2); break; default: abort(); @@ -3684,10 +3755,10 @@ static int gen_neon_unzip(int rd, int rm, int size, int q) } else { switch (size) { case 0: - gen_helper_neon_unzip8(tmp, tmp2); + gen_helper_neon_unzip8(cpu_env, tmp, tmp2); break; case 1: - gen_helper_neon_unzip16(tmp, tmp2); + gen_helper_neon_unzip16(cpu_env, tmp, tmp2); break; default: abort(); @@ -3701,7 +3772,7 @@ static int gen_neon_unzip(int rd, int rm, int size, int q) static int gen_neon_zip(int rd, int rm, int size, int q) { TCGv tmp, tmp2; - if (size == 3 || (!q && size == 2)) { + if (!q && size == 2) { return 1; } tmp = tcg_const_i32(rd); @@ -3709,13 +3780,13 @@ static int gen_neon_zip(int rd, int rm, int size, int q) if (q) { switch (size) { case 0: - gen_helper_neon_qzip8(tmp, tmp2); + gen_helper_neon_qzip8(cpu_env, tmp, tmp2); break; case 1: - gen_helper_neon_qzip16(tmp, tmp2); + gen_helper_neon_qzip16(cpu_env, tmp, tmp2); break; case 2: - gen_helper_neon_qzip32(tmp, tmp2); + gen_helper_neon_qzip32(cpu_env, tmp, tmp2); break; default: abort(); @@ -3723,10 +3794,10 @@ static int gen_neon_zip(int rd, int rm, int size, int q) } else { switch (size) { case 0: - gen_helper_neon_zip8(tmp, tmp2); + gen_helper_neon_zip8(cpu_env, tmp, tmp2); break; case 1: - gen_helper_neon_zip16(tmp, tmp2); + gen_helper_neon_zip16(cpu_env, tmp, tmp2); break; default: abort(); @@ -3830,6 +3901,21 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) size = (insn >> 6) & 3; if (op > 10) return 1; + /* Catch UNDEF cases for bad values of align field */ + switch (op & 0xc) { + case 4: + if (((insn >> 5) & 1) == 1) { + return 1; + } + break; + case 8: + if (((insn >> 4) & 3) == 3) { + return 1; + } + break; + default: + break; + } nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; @@ -3975,6 +4061,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) stride = (1 << size) * nregs; } else { /* Single element. */ + int idx = (insn >> 4) & 0xf; pass = (insn >> 7) & 1; switch (size) { case 0: @@ -3993,6 +4080,39 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) abort(); } nregs = ((insn >> 8) & 3) + 1; + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((idx & (1 << size)) != 0) || + (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { + return 1; + } + break; + case 3: + if ((idx & 1) != 0) { + return 1; + } + /* fall through */ + case 2: + if (size == 2 && (idx & 2) != 0) { + return 1; + } + break; + case 4: + if ((size == 2) && ((idx & 3) == 3)) { + return 1; + } + break; + default: + abort(); + } + if ((rd + stride * (nregs - 1)) > 31) { + /* Attempts to write off the end of the register file + * are UNPREDICTABLE; we choose to UNDEF because otherwise + * the neon_load_reg() would write off the end of the array. + */ + return 1; + } addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { @@ -4077,9 +4197,9 @@ static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src) static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src) { switch (size) { - case 0: gen_helper_neon_narrow_sat_s8(dest, src); break; - case 1: gen_helper_neon_narrow_sat_s16(dest, src); break; - case 2: gen_helper_neon_narrow_sat_s32(dest, src); break; + case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break; + case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break; + case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break; default: abort(); } } @@ -4087,9 +4207,9 @@ static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src) static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src) { switch (size) { - case 0: gen_helper_neon_narrow_sat_u8(dest, src); break; - case 1: gen_helper_neon_narrow_sat_u16(dest, src); break; - case 2: gen_helper_neon_narrow_sat_u32(dest, src); break; + case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break; + case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break; + case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break; default: abort(); } } @@ -4097,9 +4217,9 @@ static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src) static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src) { switch (size) { - case 0: gen_helper_neon_unarrow_sat8(dest, src); break; - case 1: gen_helper_neon_unarrow_sat16(dest, src); break; - case 2: gen_helper_neon_unarrow_sat32(dest, src); break; + case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break; + case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break; + case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break; default: abort(); } } @@ -4191,8 +4311,8 @@ static inline void gen_neon_negl(TCGv_i64 var, int size) static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size) { switch (size) { - case 1: gen_helper_neon_addl_saturate_s32(op0, op0, op1); break; - case 2: gen_helper_neon_addl_saturate_s64(op0, op0, op1); break; + case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; + case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; default: abort(); } } @@ -4312,6 +4432,113 @@ static const uint8_t neon_3r_sizes[] = { [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */ }; +/* Symbolic constants for op fields for Neon 2-register miscellaneous. + * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B + * table A7-13. + */ +#define NEON_2RM_VREV64 0 +#define NEON_2RM_VREV32 1 +#define NEON_2RM_VREV16 2 +#define NEON_2RM_VPADDL 4 +#define NEON_2RM_VPADDL_U 5 +#define NEON_2RM_VCLS 8 +#define NEON_2RM_VCLZ 9 +#define NEON_2RM_VCNT 10 +#define NEON_2RM_VMVN 11 +#define NEON_2RM_VPADAL 12 +#define NEON_2RM_VPADAL_U 13 +#define NEON_2RM_VQABS 14 +#define NEON_2RM_VQNEG 15 +#define NEON_2RM_VCGT0 16 +#define NEON_2RM_VCGE0 17 +#define NEON_2RM_VCEQ0 18 +#define NEON_2RM_VCLE0 19 +#define NEON_2RM_VCLT0 20 +#define NEON_2RM_VABS 22 +#define NEON_2RM_VNEG 23 +#define NEON_2RM_VCGT0_F 24 +#define NEON_2RM_VCGE0_F 25 +#define NEON_2RM_VCEQ0_F 26 +#define NEON_2RM_VCLE0_F 27 +#define NEON_2RM_VCLT0_F 28 +#define NEON_2RM_VABS_F 30 +#define NEON_2RM_VNEG_F 31 +#define NEON_2RM_VSWP 32 +#define NEON_2RM_VTRN 33 +#define NEON_2RM_VUZP 34 +#define NEON_2RM_VZIP 35 +#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ +#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ +#define NEON_2RM_VSHLL 38 +#define NEON_2RM_VCVT_F16_F32 44 +#define NEON_2RM_VCVT_F32_F16 46 +#define NEON_2RM_VRECPE 56 +#define NEON_2RM_VRSQRTE 57 +#define NEON_2RM_VRECPE_F 58 +#define NEON_2RM_VRSQRTE_F 59 +#define NEON_2RM_VCVT_FS 60 +#define NEON_2RM_VCVT_FU 61 +#define NEON_2RM_VCVT_SF 62 +#define NEON_2RM_VCVT_UF 63 + +static int neon_2rm_is_float_op(int op) +{ + /* Return true if this neon 2reg-misc op is float-to-float */ + return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F || + op >= NEON_2RM_VRECPE_F); +} + +/* Each entry in this array has bit n set if the insn allows + * size value n (otherwise it will UNDEF). Since unallocated + * op values will have no bits set they always UNDEF. + */ +static const uint8_t neon_2rm_sizes[] = { + [NEON_2RM_VREV64] = 0x7, + [NEON_2RM_VREV32] = 0x3, + [NEON_2RM_VREV16] = 0x1, + [NEON_2RM_VPADDL] = 0x7, + [NEON_2RM_VPADDL_U] = 0x7, + [NEON_2RM_VCLS] = 0x7, + [NEON_2RM_VCLZ] = 0x7, + [NEON_2RM_VCNT] = 0x1, + [NEON_2RM_VMVN] = 0x1, + [NEON_2RM_VPADAL] = 0x7, + [NEON_2RM_VPADAL_U] = 0x7, + [NEON_2RM_VQABS] = 0x7, + [NEON_2RM_VQNEG] = 0x7, + [NEON_2RM_VCGT0] = 0x7, + [NEON_2RM_VCGE0] = 0x7, + [NEON_2RM_VCEQ0] = 0x7, + [NEON_2RM_VCLE0] = 0x7, + [NEON_2RM_VCLT0] = 0x7, + [NEON_2RM_VABS] = 0x7, + [NEON_2RM_VNEG] = 0x7, + [NEON_2RM_VCGT0_F] = 0x4, + [NEON_2RM_VCGE0_F] = 0x4, + [NEON_2RM_VCEQ0_F] = 0x4, + [NEON_2RM_VCLE0_F] = 0x4, + [NEON_2RM_VCLT0_F] = 0x4, + [NEON_2RM_VABS_F] = 0x4, + [NEON_2RM_VNEG_F] = 0x4, + [NEON_2RM_VSWP] = 0x1, + [NEON_2RM_VTRN] = 0x7, + [NEON_2RM_VUZP] = 0x7, + [NEON_2RM_VZIP] = 0x7, + [NEON_2RM_VMOVN] = 0x7, + [NEON_2RM_VQMOVN] = 0x7, + [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_VCVT_F16_F32] = 0x2, + [NEON_2RM_VCVT_F32_F16] = 0x2, + [NEON_2RM_VRECPE] = 0x4, + [NEON_2RM_VRSQRTE] = 0x4, + [NEON_2RM_VRECPE_F] = 0x4, + [NEON_2RM_VRSQRTE_F] = 0x4, + [NEON_2RM_VCVT_FS] = 0x4, + [NEON_2RM_VCVT_FU] = 0x4, + [NEON_2RM_VCVT_SF] = 0x4, + [NEON_2RM_VCVT_UF] = 0x4, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -4328,7 +4555,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) int count; int pairwise; int u; - int n; uint32_t imm, mask; TCGv tmp, tmp2, tmp3, tmp4, tmp5; TCGv_i64 tmp64; @@ -4362,16 +4588,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) switch (op) { case NEON_3R_VQADD: if (u) { - gen_helper_neon_qadd_u64(cpu_V0, cpu_V0, cpu_V1); + gen_helper_neon_qadd_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } else { - gen_helper_neon_qadd_s64(cpu_V0, cpu_V0, cpu_V1); + gen_helper_neon_qadd_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } break; case NEON_3R_VQSUB: if (u) { - gen_helper_neon_qsub_u64(cpu_V0, cpu_V0, cpu_V1); + gen_helper_neon_qsub_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } else { - gen_helper_neon_qsub_s64(cpu_V0, cpu_V0, cpu_V1); + gen_helper_neon_qsub_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } break; case NEON_3R_VSHL: @@ -4383,9 +4613,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case NEON_3R_VQSHL: if (u) { - gen_helper_neon_qshl_u64(cpu_V0, cpu_V1, cpu_V0); + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); } else { - gen_helper_neon_qshl_s64(cpu_V0, cpu_V1, cpu_V0); + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); } break; case NEON_3R_VRSHL: @@ -4397,9 +4629,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case NEON_3R_VQRSHL: if (u) { - gen_helper_neon_qrshl_u64(cpu_V0, cpu_V1, cpu_V0); + gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); } else { - gen_helper_neon_qrshl_s64(cpu_V0, cpu_V1, cpu_V0); + gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); } break; case NEON_3R_VADD_VSUB: @@ -4480,16 +4714,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (pairwise) { /* Pairwise. */ - if (q) - n = (pass & 1) * 2; - else - n = 0; - if (pass < q + 1) { - tmp = neon_load_reg(rn, n); - tmp2 = neon_load_reg(rn, n + 1); + if (pass < 1) { + tmp = neon_load_reg(rn, 0); + tmp2 = neon_load_reg(rn, 1); } else { - tmp = neon_load_reg(rm, n); - tmp2 = neon_load_reg(rm, n + 1); + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); } } else { /* Elementwise. */ @@ -4501,7 +4731,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(hadd); break; case NEON_3R_VQADD: - GEN_NEON_INTEGER_OP(qadd); + GEN_NEON_INTEGER_OP_ENV(qadd); break; case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); @@ -4544,7 +4774,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(hsub); break; case NEON_3R_VQSUB: - GEN_NEON_INTEGER_OP(qsub); + GEN_NEON_INTEGER_OP_ENV(qsub); break; case NEON_3R_VCGT: GEN_NEON_INTEGER_OP(cgt); @@ -4556,13 +4786,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(shl); break; case NEON_3R_VQSHL: - GEN_NEON_INTEGER_OP(qshl); + GEN_NEON_INTEGER_OP_ENV(qshl); break; case NEON_3R_VRSHL: GEN_NEON_INTEGER_OP(rshl); break; case NEON_3R_VQRSHL: - GEN_NEON_INTEGER_OP(qrshl); + GEN_NEON_INTEGER_OP_ENV(qrshl); break; case NEON_3R_VMAX: GEN_NEON_INTEGER_OP(max); @@ -4644,14 +4874,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */ if (!u) { /* VQDMULH */ switch (size) { - case 1: gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2); break; + case 1: + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; default: abort(); } } else { /* VQRDMULH */ switch (size) { - case 1: gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); break; + case 1: + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; default: abort(); } } @@ -4665,57 +4903,78 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } break; case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); switch ((u << 2) | size) { case 0: /* VADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + case 4: /* VPADD */ + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); break; case 2: /* VSUB */ - gen_helper_neon_sub_f32(tmp, tmp, tmp2); - break; - case 4: /* VPADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus); break; case 6: /* VABD */ - gen_helper_neon_abd_f32(tmp, tmp, tmp2); + gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus); break; default: abort(); } + tcg_temp_free_ptr(fpstatus); break; + } case NEON_3R_FLOAT_MULTIPLY: - gen_helper_neon_mul_f32(tmp, tmp, tmp2); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); if (!u) { tcg_temp_free_i32(tmp2); tmp2 = neon_load_reg(rd, pass); if (size == 0) { - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); } else { - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); } } + tcg_temp_free_ptr(fpstatus); break; + } case NEON_3R_FLOAT_CMP: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); if (!u) { - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus); } else { - if (size == 0) - gen_helper_neon_cge_f32(tmp, tmp, tmp2); - else - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + if (size == 0) { + gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); + } } + tcg_temp_free_ptr(fpstatus); break; + } case NEON_3R_FLOAT_ACMP: - if (size == 0) - gen_helper_neon_acge_f32(tmp, tmp, tmp2); - else - gen_helper_neon_acgt_f32(tmp, tmp, tmp2); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); break; + } case NEON_3R_FLOAT_MINMAX: - if (size == 0) - gen_helper_neon_max_f32(tmp, tmp, tmp2); - else - gen_helper_neon_min_f32(tmp, tmp, tmp2); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); break; + } case NEON_3R_VRECPS_VRSQRTS: if (size == 0) gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); @@ -4749,7 +5008,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two registers and shift. */ op = (insn >> 8) & 0xf; if (insn & (1 << 7)) { - /* 64-bit shift. */ + /* 64-bit shift. */ + if (op > 7) { + return 1; + } size = 3; } else { size = 2; @@ -4762,6 +5024,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ + if (q && ((rd | rm) & 1)) { + return 1; + } + if (!u && (op == 4 || op == 6)) { + return 1; + } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ if (op <= 4) @@ -4809,27 +5077,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; case 4: /* VSRI */ - if (!u) - return 1; - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 5: /* VSHL, VSLI */ gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); break; case 6: /* VQSHLU */ - if (u) { - gen_helper_neon_qshlu_s64(cpu_V0, - cpu_V0, cpu_V1); - } else { - return 1; - } + gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); break; case 7: /* VQSHL */ if (u) { - gen_helper_neon_qshl_u64(cpu_V0, + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); } else { - gen_helper_neon_qshl_s64(cpu_V0, + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); } break; @@ -4870,38 +5130,34 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(rshl); break; case 4: /* VSRI */ - if (!u) - return 1; - GEN_NEON_INTEGER_OP(shl); - break; case 5: /* VSHL, VSLI */ switch (size) { case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } break; case 6: /* VQSHLU */ - if (!u) { - return 1; - } switch (size) { case 0: - gen_helper_neon_qshlu_s8(tmp, tmp, tmp2); + gen_helper_neon_qshlu_s8(tmp, cpu_env, + tmp, tmp2); break; case 1: - gen_helper_neon_qshlu_s16(tmp, tmp, tmp2); + gen_helper_neon_qshlu_s16(tmp, cpu_env, + tmp, tmp2); break; case 2: - gen_helper_neon_qshlu_s32(tmp, tmp, tmp2); + gen_helper_neon_qshlu_s32(tmp, cpu_env, + tmp, tmp2); break; default: - return 1; + abort(); } break; case 7: /* VQSHL */ - GEN_NEON_INTEGER_OP(qshl); + GEN_NEON_INTEGER_OP_ENV(qshl); break; } tcg_temp_free_i32(tmp2); @@ -4955,7 +5211,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Shift by immediate and narrow: VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ int input_unsigned = (op == 8) ? !u : u; - + if (rm & 1) { + return 1; + } shift = shift - (1 << (size + 3)); size++; if (size == 3) { @@ -5023,9 +5281,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp2); } } else if (op == 10) { - /* VSHLL */ - if (q || size == 3) + /* VSHLL, VMOVL */ + if (q || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { @@ -5066,6 +5325,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else if (op >= 14) { /* VCVT fixed-point. */ + if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { + return 1; + } /* We have already masked out the must-be-1 top bit of imm6, * hence this 32-shift where the ARM ARM has 64-imm6. */ @@ -5074,14 +5336,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); if (!(op & 1)) { if (u) - gen_vfp_ulto(0, shift); + gen_vfp_ulto(0, shift, 1); else - gen_vfp_slto(0, shift); + gen_vfp_slto(0, shift, 1); } else { if (u) - gen_vfp_toul(0, shift); + gen_vfp_toul(0, shift, 1); else - gen_vfp_tosl(0, shift); + gen_vfp_tosl(0, shift, 1); } tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); } @@ -5090,11 +5352,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else { /* (insn & 0x00380080) == 0 */ int invert; + if (q && (rd & 1)) { + return 1; + } op = (insn >> 8) & 0xf; /* One register and immediate. */ imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); invert = (insn & (1 << 5)) != 0; + /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + */ switch (op) { case 0: case 1: /* no-op */ @@ -5126,6 +5395,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) imm = ~imm; break; case 15: + if (invert) { + return 1; + } imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; @@ -5147,6 +5419,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* VMOV, VMVN. */ tmp = tcg_temp_new_i32(); if (op == 14 && invert) { + int n; uint32_t val; val = 0; for (n = 0; n < 4; n++) { @@ -5169,31 +5442,47 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) int src1_wide; int src2_wide; int prewiden; - /* prewiden, src1_wide, src2_wide */ - static const int neon_3reg_wide[16][3] = { - {1, 0, 0}, /* VADDL */ - {1, 1, 0}, /* VADDW */ - {1, 0, 0}, /* VSUBL */ - {1, 1, 0}, /* VSUBW */ - {0, 1, 1}, /* VADDHN */ - {0, 0, 0}, /* VABAL */ - {0, 1, 1}, /* VSUBHN */ - {0, 0, 0}, /* VABDL */ - {0, 0, 0}, /* VMLAL */ - {0, 0, 0}, /* VQDMLAL */ - {0, 0, 0}, /* VMLSL */ - {0, 0, 0}, /* VQDMLSL */ - {0, 0, 0}, /* Integer VMULL */ - {0, 0, 0}, /* VQDMULL */ - {0, 0, 0} /* Polynomial VMULL */ + /* undefreq: bit 0 : UNDEF if size != 0 + * bit 1 : UNDEF if size == 0 + * bit 2 : UNDEF if U == 1 + * Note that [1:0] set implies 'always UNDEF' + */ + int undefreq; + /* prewiden, src1_wide, src2_wide, undefreq */ + static const int neon_3reg_wide[16][4] = { + {1, 0, 0, 0}, /* VADDL */ + {1, 1, 0, 0}, /* VADDW */ + {1, 0, 0, 0}, /* VSUBL */ + {1, 1, 0, 0}, /* VSUBW */ + {0, 1, 1, 0}, /* VADDHN */ + {0, 0, 0, 0}, /* VABAL */ + {0, 1, 1, 0}, /* VSUBHN */ + {0, 0, 0, 0}, /* VABDL */ + {0, 0, 0, 0}, /* VMLAL */ + {0, 0, 0, 6}, /* VQDMLAL */ + {0, 0, 0, 0}, /* VMLSL */ + {0, 0, 0, 6}, /* VQDMLSL */ + {0, 0, 0, 0}, /* Integer VMULL */ + {0, 0, 0, 2}, /* VQDMULL */ + {0, 0, 0, 5}, /* Polynomial VMULL */ + {0, 0, 0, 3}, /* Reserved: always UNDEF */ }; prewiden = neon_3reg_wide[op][0]; src1_wide = neon_3reg_wide[op][1]; src2_wide = neon_3reg_wide[op][2]; + undefreq = neon_3reg_wide[op][3]; - if (size == 0 && (op == 9 || op == 11 || op == 13)) + if (((undefreq & 1) && (size != 0)) || + ((undefreq & 2) && (size == 0)) || + ((undefreq & 4) && u)) { + return 1; + } + if ((src1_wide && (rn & 1)) || + (src2_wide && (rm & 1)) || + (!src2_wide && (rd & 1))) { return 1; + } /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide @@ -5274,8 +5563,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp2); tcg_temp_free_i32(tmp); break; - default: /* 15 is RESERVED. */ - return 1; + default: /* 15 is RESERVED: caught earlier */ + abort(); } if (op == 13) { /* VQDMULL */ @@ -5347,16 +5636,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } } else { - /* Two registers and a scalar. */ + /* Two registers and a scalar. NB that for ops of this form + * the ARM ARM labels bit 24 as Q, but it is in our variable + * 'u', not 'q'. + */ + if (size == 0) { + return 1; + } switch (op) { - case 0: /* Integer VMLA scalar */ case 1: /* Float VMLA scalar */ - case 4: /* Integer VMLS scalar */ case 5: /* Floating point VMLS scalar */ - case 8: /* Integer VMUL scalar */ case 9: /* Floating point VMUL scalar */ + if (size == 1) { + return 1; + } + /* fall through */ + case 0: /* Integer VMLA scalar */ + case 4: /* Integer VMLS scalar */ + case 8: /* Integer VMUL scalar */ case 12: /* VQDMULH scalar */ case 13: /* VQRDMULH scalar */ + if (u && ((rd | rn) & 1)) { + return 1; + } tmp = neon_get_scalar(size, rm); neon_store_scratch(0, tmp); for (pass = 0; pass < (u ? 4 : 2); pass++) { @@ -5364,24 +5666,26 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rn, pass); if (op == 12) { if (size == 1) { - gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2); + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); } else { - gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2); + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); } } else if (op == 13) { if (size == 1) { - gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2); + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); } else { - gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); } } else if (op & 1) { - gen_helper_neon_mul_f32(tmp, tmp, tmp2); + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); } else { switch (size) { case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } } tcg_temp_free_i32(tmp2); @@ -5393,14 +5697,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_add(size, tmp, tmp2); break; case 1: - gen_helper_neon_add_f32(tmp, tmp, tmp2); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case 4: gen_neon_rsb(size, tmp, tmp2); break; case 5: - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } default: abort(); } @@ -5409,15 +5721,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg(rd, pass, tmp); } break; - case 2: /* VMLAL sclar */ case 3: /* VQDMLAL scalar */ - case 6: /* VMLSL scalar */ case 7: /* VQDMLSL scalar */ - case 10: /* VMULL scalar */ case 11: /* VQDMULL scalar */ - if (size == 0 && (op == 3 || op == 7 || op == 11)) + if (u == 1) { return 1; - + } + /* fall through */ + case 2: /* VMLAL sclar */ + case 6: /* VMLSL scalar */ + case 10: /* VMULL scalar */ + if (rd & 1) { + return 1; + } tmp2 = neon_get_scalar(size, rm); /* We need a copy of tmp2 because gen_neon_mull * deletes it during pass 0. */ @@ -5476,6 +5792,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (imm > 7 && !q) return 1; + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + if (imm == 0) { neon_load_reg64(cpu_V0, rn); if (q) { @@ -5524,10 +5844,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two register misc. */ op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); size = (insn >> 18) & 3; + /* UNDEF for unknown op values and bad op-size combinations */ + if ((neon_2rm_sizes[op] & (1 << size)) == 0) { + return 1; + } + if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) && + q && ((rm | rd) & 1)) { + return 1; + } switch (op) { - case 0: /* VREV64 */ - if (size == 3) - return 1; + case NEON_2RM_VREV64: for (pass = 0; pass < (q ? 2 : 1); pass++) { tmp = neon_load_reg(rm, pass * 2); tmp2 = neon_load_reg(rm, pass * 2 + 1); @@ -5550,10 +5876,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } break; - case 4: case 5: /* VPADDL */ - case 12: case 13: /* VPADAL */ - if (size == 3) - return 1; + case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U: + case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U: for (pass = 0; pass < q + 1; pass++) { tmp = neon_load_reg(rm, pass * 2); gen_neon_widen(cpu_V0, tmp, size, op & 1); @@ -5565,7 +5889,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 2: tcg_gen_add_i64(CPU_V001); break; default: abort(); } - if (op >= 12) { + if (op >= NEON_2RM_VPADAL) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); gen_neon_addl(size); @@ -5573,8 +5897,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg64(cpu_V0, rd + pass); } break; - case 33: /* VTRN */ + case NEON_2RM_VTRN: if (size == 2) { + int n; for (n = 0; n < (q ? 4 : 2); n += 2) { tmp = neon_load_reg(rm, n); tmp2 = neon_load_reg(rd, n + 1); @@ -5585,24 +5910,27 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) goto elementwise; } break; - case 34: /* VUZP */ + case NEON_2RM_VUZP: if (gen_neon_unzip(rd, rm, size, q)) { return 1; } break; - case 35: /* VZIP */ + case NEON_2RM_VZIP: if (gen_neon_zip(rd, rm, size, q)) { return 1; } break; - case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ - if (size == 3) + case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: + /* also VQMOVUN; op field and mnemonics don't line up */ + if (rm & 1) { return 1; + } TCGV_UNUSED(tmp2); for (pass = 0; pass < 2; pass++) { neon_load_reg64(cpu_V0, rm + pass); tmp = tcg_temp_new_i32(); - gen_neon_narrow_op(op == 36, q, size, tmp, cpu_V0); + gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size, + tmp, cpu_V0); if (pass == 0) { tmp2 = tmp; } else { @@ -5611,9 +5939,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } break; - case 38: /* VSHLL */ - if (q || size == 3) + case NEON_2RM_VSHLL: + if (q || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { @@ -5624,9 +5953,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg64(cpu_V0, rd + pass); } break; - case 44: /* VCVT.F16.F32 */ - if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) - return 1; + case NEON_2RM_VCVT_F16_F32: + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) || + q || (rm & 1)) { + return 1; + } tmp = tcg_temp_new_i32(); tmp2 = tcg_temp_new_i32(); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); @@ -5646,9 +5977,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg(rd, 1, tmp2); tcg_temp_free_i32(tmp); break; - case 46: /* VCVT.F32.F16 */ - if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) - return 1; + case NEON_2RM_VCVT_F32_F16: + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) || + q || (rd & 1)) { + return 1; + } tmp3 = tcg_temp_new_i32(); tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); @@ -5671,7 +6004,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op == 30 || op == 31 || op >= 58) { + if (neon_2rm_is_float_op(op)) { tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); TCGV_UNUSED(tmp); @@ -5679,183 +6012,210 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = neon_load_reg(rm, pass); } switch (op) { - case 1: /* VREV32 */ + case NEON_2RM_VREV32: switch (size) { case 0: tcg_gen_bswap32_i32(tmp, tmp); break; case 1: gen_swap_half(tmp); break; - default: return 1; + default: abort(); } break; - case 2: /* VREV16 */ - if (size != 0) - return 1; + case NEON_2RM_VREV16: gen_rev16(tmp); break; - case 8: /* CLS */ + case NEON_2RM_VCLS: switch (size) { case 0: gen_helper_neon_cls_s8(tmp, tmp); break; case 1: gen_helper_neon_cls_s16(tmp, tmp); break; case 2: gen_helper_neon_cls_s32(tmp, tmp); break; - default: return 1; + default: abort(); } break; - case 9: /* CLZ */ + case NEON_2RM_VCLZ: switch (size) { case 0: gen_helper_neon_clz_u8(tmp, tmp); break; case 1: gen_helper_neon_clz_u16(tmp, tmp); break; case 2: gen_helper_clz(tmp, tmp); break; - default: return 1; + default: abort(); } break; - case 10: /* CNT */ - if (size != 0) - return 1; + case NEON_2RM_VCNT: gen_helper_neon_cnt_u8(tmp, tmp); break; - case 11: /* VNOT */ - if (size != 0) - return 1; + case NEON_2RM_VMVN: tcg_gen_not_i32(tmp, tmp); break; - case 14: /* VQABS */ + case NEON_2RM_VQABS: switch (size) { - case 0: gen_helper_neon_qabs_s8(tmp, tmp); break; - case 1: gen_helper_neon_qabs_s16(tmp, tmp); break; - case 2: gen_helper_neon_qabs_s32(tmp, tmp); break; - default: return 1; + case 0: + gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); + break; + case 1: + gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); + break; + case 2: + gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); + break; + default: abort(); } break; - case 15: /* VQNEG */ + case NEON_2RM_VQNEG: switch (size) { - case 0: gen_helper_neon_qneg_s8(tmp, tmp); break; - case 1: gen_helper_neon_qneg_s16(tmp, tmp); break; - case 2: gen_helper_neon_qneg_s32(tmp, tmp); break; - default: return 1; + case 0: + gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); + break; + case 1: + gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); + break; + case 2: + gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); + break; + default: abort(); } break; - case 16: case 19: /* VCGT #0, VCLE #0 */ + case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: tmp2 = tcg_const_i32(0); switch(size) { case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } tcg_temp_free(tmp2); - if (op == 19) + if (op == NEON_2RM_VCLE0) { tcg_gen_not_i32(tmp, tmp); + } break; - case 17: case 20: /* VCGE #0, VCLT #0 */ + case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: tmp2 = tcg_const_i32(0); switch(size) { case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } tcg_temp_free(tmp2); - if (op == 20) + if (op == NEON_2RM_VCLT0) { tcg_gen_not_i32(tmp, tmp); + } break; - case 18: /* VCEQ #0 */ + case NEON_2RM_VCEQ0: tmp2 = tcg_const_i32(0); switch(size) { case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } tcg_temp_free(tmp2); break; - case 22: /* VABS */ + case NEON_2RM_VABS: switch(size) { case 0: gen_helper_neon_abs_s8(tmp, tmp); break; case 1: gen_helper_neon_abs_s16(tmp, tmp); break; case 2: tcg_gen_abs_i32(tmp, tmp); break; - default: return 1; + default: abort(); } break; - case 23: /* VNEG */ - if (size == 3) - return 1; + case NEON_2RM_VNEG: tmp2 = tcg_const_i32(0); gen_neon_rsb(size, tmp, tmp2); tcg_temp_free(tmp2); break; - case 24: /* Float VCGT #0 */ + case NEON_2RM_VCGT0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 25: /* Float VCGE #0 */ + } + case NEON_2RM_VCGE0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_cge_f32(tmp, tmp, tmp2); + gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 26: /* Float VCEQ #0 */ + } + case NEON_2RM_VCEQ0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus); tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 27: /* Float VCLE #0 */ + } + case NEON_2RM_VCLE0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_cge_f32(tmp, tmp2, tmp); + gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus); tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 28: /* Float VCLT #0 */ + } + case NEON_2RM_VCLT0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_cgt_f32(tmp, tmp2, tmp); + gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus); tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 30: /* Float VABS */ + } + case NEON_2RM_VABS_F: gen_vfp_abs(0); break; - case 31: /* Float VNEG */ + case NEON_2RM_VNEG_F: gen_vfp_neg(0); break; - case 32: /* VSWP */ + case NEON_2RM_VSWP: tmp2 = neon_load_reg(rd, pass); neon_store_reg(rm, pass, tmp2); break; - case 33: /* VTRN */ + case NEON_2RM_VTRN: tmp2 = neon_load_reg(rd, pass); switch (size) { case 0: gen_neon_trn_u8(tmp, tmp2); break; case 1: gen_neon_trn_u16(tmp, tmp2); break; - case 2: abort(); - default: return 1; + default: abort(); } neon_store_reg(rm, pass, tmp2); break; - case 56: /* Integer VRECPE */ + case NEON_2RM_VRECPE: gen_helper_recpe_u32(tmp, tmp, cpu_env); break; - case 57: /* Integer VRSQRTE */ + case NEON_2RM_VRSQRTE: gen_helper_rsqrte_u32(tmp, tmp, cpu_env); break; - case 58: /* Float VRECPE */ + case NEON_2RM_VRECPE_F: gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env); break; - case 59: /* Float VRSQRTE */ + case NEON_2RM_VRSQRTE_F: gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env); break; - case 60: /* VCVT.F32.S32 */ - gen_vfp_sito(0); + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vfp_sito(0, 1); break; - case 61: /* VCVT.F32.U32 */ - gen_vfp_uito(0); + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vfp_uito(0, 1); break; - case 62: /* VCVT.S32.F32 */ - gen_vfp_tosiz(0); + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vfp_tosiz(0, 1); break; - case 63: /* VCVT.U32.F32 */ - gen_vfp_touiz(0); + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vfp_touiz(0, 1); break; default: - /* Reserved: 21, 29, 39-56 */ - return 1; + /* Reserved op values were caught by the + * neon_2rm_sizes[] check earlier. + */ + abort(); } - if (op == 30 || op == 31 || op >= 58) { + if (neon_2rm_is_float_op(op)) { tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); } else { @@ -5866,7 +6226,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else if ((insn & (1 << 10)) == 0) { /* VTBL, VTBX. */ - n = ((insn >> 5) & 0x18) + 8; + int n = ((insn >> 8) & 3) + 1; + if ((rn + n) > 32) { + /* This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return 1; + } + n <<= 3; if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 0); } else { @@ -5893,6 +6260,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { + return 1; + } if (insn & (1 << 19)) { tmp = neon_load_reg(rm, 1); } else { @@ -7132,7 +7502,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } else if ((insn & 0x000003e0) == 0x00000060) { tmp = load_reg(s, rm); shift = (insn >> 10) & 3; - /* ??? In many cases it's not neccessary to do a + /* ??? In many cases it's not necessary to do a rotate, a shift is sufficient. */ if (shift != 0) tcg_gen_rotri_i32(tmp, tmp, shift * 8); @@ -7800,7 +8170,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } } } else { - int i; + int i, loaded_base = 0; + TCGv loaded_var; /* Load/store multiple. */ addr = load_reg(s, rn); offset = 0; @@ -7812,6 +8183,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_addi_i32(addr, addr, -offset); } + TCGV_UNUSED(loaded_var); for (i = 0; i < 16; i++) { if ((insn & (1 << i)) == 0) continue; @@ -7820,6 +8192,9 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = gen_ld32(addr, IS_USER(s)); if (i == 15) { gen_bx(s, tmp); + } else if (i == rn) { + loaded_var = tmp; + loaded_base = 1; } else { store_reg(s, i, tmp); } @@ -7830,6 +8205,9 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } tcg_gen_addi_i32(addr, addr, 4); } + if (loaded_base) { + store_reg(s, rn, loaded_var); + } if (insn & (1 << 21)) { /* Base register writeback. */ if (insn & (1 << 24)) { @@ -7915,7 +8293,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) case 1: /* Sign/zero extend. */ tmp = load_reg(s, rm); shift = (insn >> 4) & 3; - /* ??? In many cases it's not neccessary to do a + /* ??? In many cases it's not necessary to do a rotate, a shift is sufficient. */ if (shift != 0) tcg_gen_rotri_i32(tmp, tmp, shift * 8); @@ -9230,7 +9608,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) break; case 12: + { /* load/store multiple */ + TCGv loaded_var; + TCGV_UNUSED(loaded_var); rn = (insn >> 8) & 0x7; addr = load_reg(s, rn); for (i = 0; i < 8; i++) { @@ -9238,7 +9619,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (insn & (1 << 11)) { /* load */ tmp = gen_ld32(addr, IS_USER(s)); - store_reg(s, i, tmp); + if (i == rn) { + loaded_var = tmp; + } else { + store_reg(s, i, tmp); + } } else { /* store */ tmp = load_reg(s, i); @@ -9248,14 +9633,18 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tcg_gen_addi_i32(addr, addr, 4); } } - /* Base register writeback. */ if ((insn & (1 << rn)) == 0) { + /* base reg not in list: base register writeback */ store_reg(s, rn, addr); } else { + /* base reg in list: if load, complete it now */ + if (insn & (1 << 11)) { + store_reg(s, rn, loaded_var); + } tcg_temp_free_i32(addr); } break; - + } case 13: /* conditional branch or swi */ cond = (insn >> 8) & 0xf; @@ -9384,8 +9773,8 @@ static inline void gen_intermediate_code_internal(CPUState *env, * This is handled in the same way as restoration of the * PC in these situations: we will be called again with search_pc=1 * and generate a mapping of the condexec bits for each PC in - * gen_opc_condexec_bits[]. gen_pc_load[] then uses this to restore - * the condexec bits. + * gen_opc_condexec_bits[]. restore_state_to_opc() then uses + * this to restore the condexec bits. * * Note that there are no instructions which can read the condexec * bits, and none which can write non-static values to them, so @@ -9650,8 +10039,7 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, #endif } -void gen_pc_load(CPUState *env, TranslationBlock *tb, - unsigned long searched_pc, int pc_pos, void *puc) +void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos) { env->regs[15] = gen_opc_pc[pc_pos]; env->condexec_bits = gen_opc_condexec_bits[pc_pos];