X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=target-arm%2Ftranslate.c;h=c7961b809711100588bb23cbeea1cae48a138bae;hb=4ec648dd6ed97182d123e0df5bc5012c734aa858;hp=d95133f725f40f3ed551d0de35b6c9a7b1996362;hpb=9363ee31ab53fc0fd39fbe5936d9c00a2f4e54a4;p=qemu.git diff --git a/target-arm/translate.c b/target-arm/translate.c index d95133f72..c7961b809 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -25,15 +25,18 @@ #include #include "cpu.h" -#include "exec-all.h" #include "disas.h" #include "tcg-op.h" #include "qemu-log.h" -#include "helpers.h" +#include "helper.h" #define GEN_HELPER 1 -#include "helpers.h" +#include "helper.h" +#define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T) +#define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5) +/* currently all emulated v5 cores are also v5TE, so don't bother */ +#define ENABLE_ARCH_5TE arm_feature(env, ARM_FEATURE_V5) #define ENABLE_ARCH_5J 0 #define ENABLE_ARCH_6 arm_feature(env, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_feature(env, ARM_FEATURE_V6K) @@ -125,28 +128,12 @@ void arm_translate_init(void) #endif #define GEN_HELPER 2 -#include "helpers.h" -} - -static int num_temps; - -/* Allocate a temporary variable. */ -static TCGv_i32 new_tmp(void) -{ - num_temps++; - return tcg_temp_new_i32(); -} - -/* Release a temporary variable. */ -static void dead_tmp(TCGv tmp) -{ - tcg_temp_free(tmp); - num_temps--; +#include "helper.h" } static inline TCGv load_cpu_offset(int offset) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp, cpu_env, offset); return tmp; } @@ -156,7 +143,7 @@ static inline TCGv load_cpu_offset(int offset) static inline void store_cpu_offset(TCGv var, int offset) { tcg_gen_st_i32(var, cpu_env, offset); - dead_tmp(var); + tcg_temp_free_i32(var); } #define store_cpu_field(var, name) \ @@ -181,7 +168,7 @@ static void load_reg_var(DisasContext *s, TCGv var, int reg) /* Create a new temporary and set it to the value of a CPU register. */ static inline TCGv load_reg(DisasContext *s, int reg) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); load_reg_var(s, tmp, reg); return tmp; } @@ -195,7 +182,7 @@ static void store_reg(DisasContext *s, int reg, TCGv var) s->is_jmp = DISAS_JUMP; } tcg_gen_mov_i32(cpu_R[reg], var); - dead_tmp(var); + tcg_temp_free_i32(var); } /* Value extensions. */ @@ -219,37 +206,37 @@ static inline void gen_set_cpsr(TCGv var, uint32_t mask) static void gen_exception(int excp) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, excp); gen_helper_exception(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_smul_dual(TCGv a, TCGv b) { - TCGv tmp1 = new_tmp(); - TCGv tmp2 = new_tmp(); + TCGv tmp1 = tcg_temp_new_i32(); + TCGv tmp2 = tcg_temp_new_i32(); tcg_gen_ext16s_i32(tmp1, a); tcg_gen_ext16s_i32(tmp2, b); tcg_gen_mul_i32(tmp1, tmp1, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_sari_i32(a, a, 16); tcg_gen_sari_i32(b, b, 16); tcg_gen_mul_i32(b, b, a); tcg_gen_mov_i32(a, tmp1); - dead_tmp(tmp1); + tcg_temp_free_i32(tmp1); } /* Byteswap each halfword. */ static void gen_rev16(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, var, 8); tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff); tcg_gen_shli_i32(var, var, 8); tcg_gen_andi_i32(var, var, 0xff00ff00); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Byteswap low halfword and sign extend. */ @@ -298,7 +285,7 @@ static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b) TCGv_i64 tmp64 = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(tmp64, b); - dead_tmp(b); + tcg_temp_free_i32(b); tcg_gen_shli_i64(tmp64, tmp64, 32); tcg_gen_add_i64(a, tmp64, a); @@ -312,7 +299,7 @@ static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b) TCGv_i64 tmp64 = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(tmp64, b); - dead_tmp(b); + tcg_temp_free_i32(b); tcg_gen_shli_i64(tmp64, tmp64, 32); tcg_gen_sub_i64(a, tmp64, a); @@ -329,9 +316,9 @@ static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b) TCGv_i64 tmp2 = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(tmp1, a); - dead_tmp(a); + tcg_temp_free_i32(a); tcg_gen_extu_i32_i64(tmp2, b); - dead_tmp(b); + tcg_temp_free_i32(b); tcg_gen_mul_i64(tmp1, tmp1, tmp2); tcg_temp_free_i64(tmp2); return tmp1; @@ -343,9 +330,9 @@ static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b) TCGv_i64 tmp2 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp1, a); - dead_tmp(a); + tcg_temp_free_i32(a); tcg_gen_ext_i32_i64(tmp2, b); - dead_tmp(b); + tcg_temp_free_i32(b); tcg_gen_mul_i64(tmp1, tmp1, tmp2); tcg_temp_free_i64(tmp2); return tmp1; @@ -354,11 +341,11 @@ static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b) /* Swap low and high halfwords. */ static void gen_swap_half(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, var, 16); tcg_gen_shli_i32(var, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. @@ -370,15 +357,15 @@ static void gen_swap_half(TCGv var) static void gen_add16(TCGv t0, TCGv t1) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_xor_i32(tmp, t0, t1); tcg_gen_andi_i32(tmp, tmp, 0x8000); tcg_gen_andi_i32(t0, t0, ~0x8000); tcg_gen_andi_i32(t1, t1, ~0x8000); tcg_gen_add_i32(t0, t0, t1); tcg_gen_xor_i32(t0, t0, tmp); - dead_tmp(tmp); - dead_tmp(t1); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(t1); } #define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF)) @@ -386,10 +373,10 @@ static void gen_add16(TCGv t0, TCGv t1) /* Set CF to the top bit of var. */ static void gen_set_CF_bit31(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, var, 31); gen_set_CF(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Set N and Z flags from var. */ @@ -406,7 +393,7 @@ static void gen_adc(TCGv t0, TCGv t1) tcg_gen_add_i32(t0, t0, t1); tmp = load_cpu_field(CF); tcg_gen_add_i32(t0, t0, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* dest = T0 + T1 + CF. */ @@ -416,7 +403,7 @@ static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1) tcg_gen_add_i32(dest, t0, t1); tmp = load_cpu_field(CF); tcg_gen_add_i32(dest, dest, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* dest = T0 - T1 + CF - 1. */ @@ -427,7 +414,7 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1) tmp = load_cpu_field(CF); tcg_gen_add_i32(dest, dest, tmp); tcg_gen_subi_i32(dest, dest, 1); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* FIXME: Implement this natively. */ @@ -435,7 +422,7 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1) static void shifter_out_im(TCGv var, int shift) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); if (shift == 0) { tcg_gen_andi_i32(tmp, var, 1); } else { @@ -444,7 +431,7 @@ static void shifter_out_im(TCGv var, int shift) tcg_gen_andi_i32(tmp, tmp, 1); } gen_set_CF(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Shift by immediate. Includes special handling for shift == 0. */ @@ -492,7 +479,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags) tcg_gen_shri_i32(var, var, 1); tcg_gen_shli_i32(tmp, tmp, 31); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } } }; @@ -516,7 +503,7 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop, tcg_gen_rotr_i32(var, var, shift); break; } } - dead_tmp(shift); + tcg_temp_free_i32(shift); } #define PAS_OP(pfx) \ @@ -655,7 +642,7 @@ static void gen_test_cc(int cc, int label) inv = gen_new_label(); tmp = load_cpu_field(CF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label); gen_set_label(inv); @@ -663,7 +650,7 @@ static void gen_test_cc(int cc, int label) case 9: /* ls: !C || Z */ tmp = load_cpu_field(CF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label); break; @@ -671,43 +658,43 @@ static void gen_test_cc(int cc, int label) tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label); break; case 11: /* lt: N != V -> N ^ V != 0 */ tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label); break; case 12: /* gt: !Z && N == V */ inv = gen_new_label(); tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label); gen_set_label(inv); break; case 13: /* le: Z || N != V */ tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label); break; default: fprintf(stderr, "Bad condition code 0x%x\n", cc); abort(); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static const uint8_t table_logic_cc[16] = { @@ -736,10 +723,10 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) s->is_jmp = DISAS_UPDATE; if (s->thumb != (addr & 1)) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, addr & 1); tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb)); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } tcg_gen_movi_i32(cpu_R[15], addr & ~1); } @@ -766,33 +753,47 @@ static inline void store_reg_bx(CPUState *env, DisasContext *s, } } +/* Variant of store_reg which uses branch&exchange logic when storing + * to r15 in ARM architecture v5T and above. This is used for storing + * the results of a LDR/LDM/POP into r15, and corresponds to the cases + * in the ARM ARM which use the LoadWritePC() pseudocode function. */ +static inline void store_reg_from_load(CPUState *env, DisasContext *s, + int reg, TCGv var) +{ + if (reg == 15 && ENABLE_ARCH_5) { + gen_bx(s, var); + } else { + store_reg(s, reg, var); + } +} + static inline TCGv gen_ld8s(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld8s(tmp, addr, index); return tmp; } static inline TCGv gen_ld8u(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld8u(tmp, addr, index); return tmp; } static inline TCGv gen_ld16s(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld16s(tmp, addr, index); return tmp; } static inline TCGv gen_ld16u(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld16u(tmp, addr, index); return tmp; } static inline TCGv gen_ld32(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld32u(tmp, addr, index); return tmp; } @@ -805,17 +806,17 @@ static inline TCGv_i64 gen_ld64(TCGv addr, int index) static inline void gen_st8(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st8(val, addr, index); - dead_tmp(val); + tcg_temp_free_i32(val); } static inline void gen_st16(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st16(val, addr, index); - dead_tmp(val); + tcg_temp_free_i32(val); } static inline void gen_st32(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st32(val, addr, index); - dead_tmp(val); + tcg_temp_free_i32(val); } static inline void gen_st64(TCGv_i64 val, TCGv addr, int index) { @@ -859,7 +860,7 @@ static inline void gen_add_data_offset(DisasContext *s, unsigned int insn, tcg_gen_sub_i32(var, var, offset); else tcg_gen_add_i32(var, var, offset); - dead_tmp(offset); + tcg_temp_free_i32(offset); } } @@ -887,17 +888,33 @@ static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn, tcg_gen_sub_i32(var, var, offset); else tcg_gen_add_i32(var, var, offset); - dead_tmp(offset); + tcg_temp_free_i32(offset); + } +} + +static TCGv_ptr get_fpstatus_ptr(int neon) +{ + TCGv_ptr statusptr = tcg_temp_new_ptr(); + int offset; + if (neon) { + offset = offsetof(CPUState, vfp.standard_fp_status); + } else { + offset = offsetof(CPUState, vfp.fp_status); } + tcg_gen_addi_ptr(statusptr, cpu_env, offset); + return statusptr; } #define VFP_OP2(name) \ static inline void gen_vfp_##name(int dp) \ { \ - if (dp) \ - gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \ - else \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \ + TCGv_ptr fpst = get_fpstatus_ptr(0); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \ + } \ + tcg_temp_free_ptr(fpst); \ } VFP_OP2(add) @@ -907,6 +924,28 @@ VFP_OP2(div) #undef VFP_OP2 +static inline void gen_vfp_F1_mul(int dp) +{ + /* Like gen_vfp_mul() but put result in F1 */ + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst); + } else { + gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst); + } + tcg_temp_free_ptr(fpst); +} + +static inline void gen_vfp_F1_neg(int dp) +{ + /* Like gen_vfp_neg() but put result in F1 */ + if (dp) { + gen_helper_vfp_negd(cpu_F1d, cpu_F0d); + } else { + gen_helper_vfp_negs(cpu_F1s, cpu_F0s); + } +} + static inline void gen_vfp_abs(int dp) { if (dp) @@ -955,63 +994,52 @@ static inline void gen_vfp_F1_ld0(int dp) tcg_gen_movi_i32(cpu_F1s, 0); } -static inline void gen_vfp_uito(int dp) -{ - if (dp) - gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env); - else - gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env); -} - -static inline void gen_vfp_sito(int dp) -{ - if (dp) - gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env); - else - gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env); +#define VFP_GEN_ITOF(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ } -static inline void gen_vfp_toui(int dp) -{ - if (dp) - gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env); -} +VFP_GEN_ITOF(uito) +VFP_GEN_ITOF(sito) +#undef VFP_GEN_ITOF -static inline void gen_vfp_touiz(int dp) -{ - if (dp) - gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env); -} - -static inline void gen_vfp_tosi(int dp) -{ - if (dp) - gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env); +#define VFP_GEN_FTOI(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ } -static inline void gen_vfp_tosiz(int dp) -{ - if (dp) - gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env); -} +VFP_GEN_FTOI(toui) +VFP_GEN_FTOI(touiz) +VFP_GEN_FTOI(tosi) +VFP_GEN_FTOI(tosiz) +#undef VFP_GEN_FTOI #define VFP_GEN_FIX(name) \ -static inline void gen_vfp_##name(int dp, int shift) \ +static inline void gen_vfp_##name(int dp, int shift, int neon) \ { \ TCGv tmp_shift = tcg_const_i32(shift); \ - if (dp) \ - gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, cpu_env);\ - else \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, cpu_env);\ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \ + } \ tcg_temp_free_i32(tmp_shift); \ + tcg_temp_free_ptr(statusptr); \ } VFP_GEN_FIX(tosh) VFP_GEN_FIX(tosl) @@ -1065,7 +1093,7 @@ neon_reg_offset (int reg, int n) static TCGv neon_load_reg(int reg, int pass) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass)); return tmp; } @@ -1073,7 +1101,7 @@ static TCGv neon_load_reg(int reg, int pass) static void neon_store_reg(int reg, int pass, TCGv var) { tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); - dead_tmp(var); + tcg_temp_free_i32(var); } static inline void neon_load_reg64(TCGv_i64 var, int reg) @@ -1129,7 +1157,7 @@ static inline void iwmmxt_store_reg(TCGv_i64 var, int reg) static inline TCGv iwmmxt_load_creg(int reg) { - TCGv var = new_tmp(); + TCGv var = tcg_temp_new_i32(); tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); return var; } @@ -1137,7 +1165,7 @@ static inline TCGv iwmmxt_load_creg(int reg) static inline void iwmmxt_store_creg(int reg, TCGv var) { tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); - dead_tmp(var); + tcg_temp_free_i32(var); } static inline void gen_op_iwmmxt_movq_wRn_M0(int rn) @@ -1268,7 +1296,7 @@ static void gen_op_iwmmxt_set_cup(void) static void gen_op_iwmmxt_setpsr_nz(void) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0); store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]); } @@ -1300,7 +1328,7 @@ static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest) if (insn & (1 << 21)) store_reg(s, rd, tmp); else - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else if (insn & (1 << 21)) { /* Post indexed */ tcg_gen_mov_i32(dest, tmp); @@ -1326,17 +1354,17 @@ static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest) tmp = iwmmxt_load_creg(rd); } } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); iwmmxt_load_reg(cpu_V0, rd); tcg_gen_trunc_i64_i32(tmp, cpu_V0); } tcg_gen_andi_i32(tmp, tmp, mask); tcg_gen_mov_i32(dest, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 0; } -/* Disassemble an iwMMXt instruction. Returns nonzero if an error occured +/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) { @@ -1364,14 +1392,14 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } wrd = (insn >> 12) & 0xf; - addr = new_tmp(); + addr = tcg_temp_new_i32(); if (gen_iwmmxt_address(s, insn, addr)) { - dead_tmp(addr); + tcg_temp_free_i32(addr); return 1; } if (insn & ARM_CP_RW_BIT) { if ((insn >> 28) == 0xf) { /* WLDRW wCx */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s)); iwmmxt_store_creg(wrd, tmp); } else { @@ -1392,7 +1420,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } if (i) { tcg_gen_extu_i32_i64(cpu_M0, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } gen_op_iwmmxt_movq_wRn_M0(wrd); } @@ -1402,10 +1430,10 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_st32(tmp, addr, IS_USER(s)); } else { gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (insn & (1 << 8)) { if (insn & (1 << 22)) { /* WSTRD */ - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s)); } else { /* WSTRW wRd */ tcg_gen_trunc_i64_i32(tmp, cpu_M0); @@ -1422,7 +1450,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } } } - dead_tmp(addr); + tcg_temp_free_i32(addr); return 0; } @@ -1457,7 +1485,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) tmp = iwmmxt_load_creg(wrd); tmp2 = load_reg(s, rd); tcg_gen_andc_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); iwmmxt_store_creg(wrd, tmp); break; case ARM_IWMMXT_wCGR0: @@ -1670,7 +1698,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) tcg_gen_andi_i32(tmp, tmp, 7); iwmmxt_load_reg(cpu_V1, rd1); gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -1701,7 +1729,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3); tcg_temp_free(tmp3); tcg_temp_free(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -1711,7 +1739,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) if (rd == 15 || ((insn >> 22) & 3) == 3) return 1; gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); switch ((insn >> 22) & 3) { case 0: tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3); @@ -1755,7 +1783,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) } tcg_gen_shli_i32(tmp, tmp, 28); gen_set_nzcv(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */ if (((insn >> 6) & 3) == 3) @@ -1774,7 +1802,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_helper_iwmmxt_bcstl(cpu_M0, tmp); break; } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -1782,7 +1810,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) return 1; tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp2, tmp); switch ((insn >> 22) & 3) { case 0: @@ -1803,8 +1831,8 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) break; } gen_set_nzcv(tmp); - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */ wrd = (insn >> 12) & 0xf; @@ -1830,7 +1858,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) return 1; tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp2, tmp); switch ((insn >> 22) & 3) { case 0: @@ -1851,8 +1879,8 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) break; } gen_set_nzcv(tmp); - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */ rd = (insn >> 12) & 0xf; @@ -1860,7 +1888,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3) return 1; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); switch ((insn >> 22) & 3) { case 0: gen_helper_iwmmxt_msbb(tmp, cpu_M0); @@ -1975,9 +2003,9 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 1; } switch ((insn >> 22) & 3) { @@ -1991,7 +2019,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp); break; } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2003,9 +2031,9 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 1; } switch ((insn >> 22) & 3) { @@ -2019,7 +2047,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp); break; } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2031,9 +2059,9 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 1; } switch ((insn >> 22) & 3) { @@ -2047,7 +2075,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp); break; } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2059,31 +2087,31 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); switch ((insn >> 22) & 3) { case 1: if (gen_iwmmxt_shift(insn, 0xf, tmp)) { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 1; } gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp); break; case 2: if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 1; } gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp); break; case 3: if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 1; } gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp); break; } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2324,12 +2352,12 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); break; default: - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); return 1; } - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -2340,7 +2368,7 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) return 0; } -/* Disassemble an XScale DSP instruction. Returns nonzero if an error occured +/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) { @@ -2378,8 +2406,8 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) default: return 1; } - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(acc); return 0; @@ -2425,7 +2453,7 @@ static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn) if (!env->cp[cp].cp_read) return 1; gen_set_pc_im(s->pc); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tmp2 = tcg_const_i32(insn); gen_helper_get_cp(tmp, cpu_env, tmp2); tcg_temp_free(tmp2); @@ -2438,28 +2466,38 @@ static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn) tmp2 = tcg_const_i32(insn); gen_helper_set_cp(cpu_env, tmp2, tmp); tcg_temp_free(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } return 0; } -static int cp15_user_ok(uint32_t insn) +static int cp15_user_ok(CPUState *env, uint32_t insn) { int cpn = (insn >> 16) & 0xf; int cpm = insn & 0xf; int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38); + if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) { + /* Performance monitor registers fall into three categories: + * (a) always UNDEF in usermode + * (b) UNDEF only if PMUSERENR.EN is 0 + * (c) always read OK and UNDEF on write (PMUSERENR only) + */ + if ((cpm == 12 && (op < 6)) || + (cpm == 13 && (op < 3))) { + return env->cp15.c9_pmuserenr; + } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) { + /* PMUSERENR, read only */ + return 1; + } + return 0; + } + if (cpn == 13 && cpm == 0) { /* TLS register. */ if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT))) return 1; } - if (cpn == 7) { - /* ISB, DSB, DMB. */ - if ((cpm == 5 && op == 4) - || (cpm == 10 && (op == 4 || op == 5))) - return 1; - } return 0; } @@ -2505,7 +2543,7 @@ static int cp15_tls_load_store(CPUState *env, DisasContext *s, uint32_t insn, ui store_cpu_field(tmp, cp15.c13_tls3); break; default: - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 0; } } @@ -2535,16 +2573,62 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) /* cdp */ return 1; } - if (IS_USER(s) && !cp15_user_ok(insn)) { - return 1; - } - if ((insn & 0x0fff0fff) == 0x0e070f90 - || (insn & 0x0fff0fff) == 0x0e070f58) { - /* Wait for interrupt. */ - gen_set_pc_im(s->pc); - s->is_jmp = DISAS_WFI; + /* We special case a number of cp15 instructions which were used + * for things which are real instructions in ARMv7. This allows + * them to work in linux-user mode which doesn't provide functional + * get_cp15/set_cp15 helpers, and is more efficient anyway. + */ + switch ((insn & 0x0fff0fff)) { + case 0x0e070f90: + /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores). + * In v7, this must NOP. + */ + if (IS_USER(s)) { + return 1; + } + if (!arm_feature(env, ARM_FEATURE_V7)) { + /* Wait for interrupt. */ + gen_set_pc_im(s->pc); + s->is_jmp = DISAS_WFI; + } return 0; + case 0x0e070f58: + /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI, + * so this is slightly over-broad. + */ + if (!IS_USER(s) && !arm_feature(env, ARM_FEATURE_V6)) { + /* Wait for interrupt. */ + gen_set_pc_im(s->pc); + s->is_jmp = DISAS_WFI; + return 0; + } + /* Otherwise continue to handle via helper function. + * In particular, on v7 and some v6 cores this is one of + * the VA-PA registers. + */ + break; + case 0x0e070f3d: + /* 0,c7,c13,1: prefetch-by-MVA in v6, NOP in v7 */ + if (arm_feature(env, ARM_FEATURE_V6)) { + return IS_USER(s) ? 1 : 0; + } + break; + case 0x0e070f95: /* 0,c7,c5,4 : ISB */ + case 0x0e070f9a: /* 0,c7,c10,4: DSB */ + case 0x0e070fba: /* 0,c7,c10,5: DMB */ + /* Barriers in both v6 and v7 */ + if (arm_feature(env, ARM_FEATURE_V6)) { + return 0; + } + break; + default: + break; + } + + if (IS_USER(s) && !cp15_user_ok(env, insn)) { + return 1; } + rd = (insn >> 12) & 0xf; if (cp15_tls_load_store(env, s, insn, rd)) @@ -2552,17 +2636,17 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) tmp2 = tcg_const_i32(insn); if (insn & ARM_CP_RW_BIT) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_get_cp15(tmp, cpu_env, tmp2); /* If the destination register is r15 then sets condition codes. */ if (rd != 15) store_reg(s, rd, tmp); else - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { tmp = load_reg(s, rd); gen_helper_set_cp15(cpu_env, tmp2, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); /* Normally we would always end the TB here, but Linux * arch/arm/mach-pxa/sleep.S expects two instructions following * an MMU enable to execute from cache. Imitate this behaviour. */ @@ -2597,7 +2681,7 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) /* Move between integer and VFP cores. */ static TCGv gen_vfp_mrs(void) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp, cpu_F0s); return tmp; } @@ -2605,12 +2689,12 @@ static TCGv gen_vfp_mrs(void) static void gen_vfp_msr(TCGv tmp) { tcg_gen_mov_i32(cpu_F0s, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_neon_dup_u8(TCGv var, int shift) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); if (shift) tcg_gen_shri_i32(var, var, shift); tcg_gen_ext8u_i32(var, var); @@ -2618,28 +2702,50 @@ static void gen_neon_dup_u8(TCGv var, int shift) tcg_gen_or_i32(var, var, tmp); tcg_gen_shli_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_neon_dup_low16(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ext16u_i32(var, var); tcg_gen_shli_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_neon_dup_high16(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_andi_i32(var, var, 0xffff0000); tcg_gen_shri_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); +} + +static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size) +{ + /* Load a single Neon element and replicate into a 32 bit TCG reg */ + TCGv tmp; + switch (size) { + case 0: + tmp = gen_ld8u(addr, IS_USER(s)); + gen_neon_dup_u8(tmp, 0); + break; + case 1: + tmp = gen_ld16u(addr, IS_USER(s)); + gen_neon_dup_low16(tmp); + break; + case 2: + tmp = gen_ld32(addr, IS_USER(s)); + break; + default: /* Avoid compiler warnings. */ + abort(); + } + return tmp; } -/* Disassemble a VFP instruction. Returns nonzero if an error occured +/* Disassemble a VFP instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) { @@ -2731,7 +2837,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_dup_low16(tmp); } for (n = 0; n <= pass * 2; n++) { - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp2, tmp); neon_store_reg(rn, n, tmp2); } @@ -2742,12 +2848,12 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: tmp2 = neon_load_reg(rn, pass); gen_bfi(tmp, tmp2, tmp, offset, 0xff); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); break; case 1: tmp2 = neon_load_reg(rn, pass); gen_bfi(tmp, tmp2, tmp, offset, 0xffff); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); break; case 2: break; @@ -2793,7 +2899,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); tcg_gen_andi_i32(tmp, tmp, 0xf0000000); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_vfp_get_fpscr(tmp, cpu_env); } break; @@ -2814,7 +2920,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) if (rd == 15) { /* Set the 4 flag bits in the CPSR. */ gen_set_nzcv(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { store_reg(s, rd, tmp); } @@ -2832,7 +2938,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case ARM_VFP_FPSCR: gen_helper_vfp_set_fpscr(cpu_env, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_lookup_tb(s); break; case ARM_VFP_FPEXC: @@ -2979,27 +3085,34 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) for (;;) { /* Perform the calculation. */ switch (op) { - case 0: /* mac: fd + (fn * fm) */ - gen_vfp_mul(dp); - gen_mov_F1_vreg(dp, rd); + case 0: /* VMLA: fd + (fn * fm) */ + /* Note that order of inputs to the add matters for NaNs */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_add(dp); break; - case 1: /* nmac: fd - (fn * fm) */ + case 1: /* VMLS: fd + -(fn * fm) */ gen_vfp_mul(dp); - gen_vfp_neg(dp); - gen_mov_F1_vreg(dp, rd); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_add(dp); break; - case 2: /* msc: -fd + (fn * fm) */ - gen_vfp_mul(dp); - gen_mov_F1_vreg(dp, rd); - gen_vfp_sub(dp); + case 2: /* VNMLS: -fd + (fn * fm) */ + /* Note that it isn't valid to replace (-A + B) with (B - A) + * or similar plausible looking simplifications + * because this will give wrong results for NaNs. + */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_neg(dp); + gen_vfp_add(dp); break; - case 3: /* nmsc: -fd - (fn * fm) */ + case 3: /* VNMLA: -fd + -(fn * fm) */ gen_vfp_mul(dp); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_neg(dp); - gen_mov_F1_vreg(dp, rd); - gen_vfp_sub(dp); + gen_vfp_add(dp); break; case 4: /* mul: fn * fm */ gen_vfp_mul(dp); @@ -3059,7 +3172,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = gen_vfp_mrs(); tcg_gen_ext16u_i32(tmp, tmp); gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 5: /* vcvtt.f32.f16 */ if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) @@ -3067,31 +3180,31 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = gen_vfp_mrs(); tcg_gen_shri_i32(tmp, tmp, 16); gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 6: /* vcvtb.f16.f32 */ if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) return 1; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); gen_mov_F0_vreg(0, rd); tmp2 = gen_vfp_mrs(); tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); gen_vfp_msr(tmp); break; case 7: /* vcvtt.f16.f32 */ if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) return 1; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); tcg_gen_shli_i32(tmp, tmp, 16); gen_mov_F0_vreg(0, rd); tmp2 = gen_vfp_mrs(); tcg_gen_ext16u_i32(tmp2, tmp2); tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); gen_vfp_msr(tmp); break; case 8: /* cmp */ @@ -3114,62 +3227,62 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env); break; case 16: /* fuito */ - gen_vfp_uito(dp); + gen_vfp_uito(dp, 0); break; case 17: /* fsito */ - gen_vfp_sito(dp); + gen_vfp_sito(dp, 0); break; case 20: /* fshto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_shto(dp, 16 - rm); + gen_vfp_shto(dp, 16 - rm, 0); break; case 21: /* fslto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_slto(dp, 32 - rm); + gen_vfp_slto(dp, 32 - rm, 0); break; case 22: /* fuhto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_uhto(dp, 16 - rm); + gen_vfp_uhto(dp, 16 - rm, 0); break; case 23: /* fulto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_ulto(dp, 32 - rm); + gen_vfp_ulto(dp, 32 - rm, 0); break; case 24: /* ftoui */ - gen_vfp_toui(dp); + gen_vfp_toui(dp, 0); break; case 25: /* ftouiz */ - gen_vfp_touiz(dp); + gen_vfp_touiz(dp, 0); break; case 26: /* ftosi */ - gen_vfp_tosi(dp); + gen_vfp_tosi(dp, 0); break; case 27: /* ftosiz */ - gen_vfp_tosiz(dp); + gen_vfp_tosiz(dp, 0); break; case 28: /* ftosh */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_tosh(dp, 16 - rm); + gen_vfp_tosh(dp, 16 - rm, 0); break; case 29: /* ftosl */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_tosl(dp, 32 - rm); + gen_vfp_tosl(dp, 32 - rm, 0); break; case 30: /* ftouh */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_touh(dp, 16 - rm); + gen_vfp_touh(dp, 16 - rm, 0); break; case 31: /* ftoul */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_toul(dp, 32 - rm); + gen_vfp_toul(dp, 32 - rm, 0); break; default: /* undefined */ printf ("rn:%d\n", rn); @@ -3232,7 +3345,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case 0xc: case 0xd: - if (dp && (insn & 0x03e00000) == 0x00400000) { + if ((insn & 0x03e00000) == 0x00400000) { /* two-register transfer */ rn = (insn >> 16) & 0xf; rd = (insn >> 12) & 0xf; @@ -3254,10 +3367,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) } else { gen_mov_F0_vreg(0, rm); tmp = gen_vfp_mrs(); - store_reg(s, rn, tmp); + store_reg(s, rd, tmp); gen_mov_F0_vreg(0, rm + 1); tmp = gen_vfp_mrs(); - store_reg(s, rd, tmp); + store_reg(s, rn, tmp); } } else { /* arm->vfp */ @@ -3269,10 +3382,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_vfp_msr(tmp); gen_mov_vreg_F0(0, rm * 2 + 1); } else { - tmp = load_reg(s, rn); + tmp = load_reg(s, rd); gen_vfp_msr(tmp); gen_mov_vreg_F0(0, rm); - tmp = load_reg(s, rd); + tmp = load_reg(s, rn); gen_vfp_msr(tmp); gen_mov_vreg_F0(0, rm + 1); } @@ -3285,7 +3398,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) else rd = VFP_SREG_D(insn); if (s->thumb && rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, s->pc & ~2); } else { addr = load_reg(s, rn); @@ -3303,7 +3416,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_mov_F0_vreg(dp, rd); gen_vfp_st(s, dp, addr); } - dead_tmp(addr); + tcg_temp_free_i32(addr); } else { /* load/store multiple */ if (dp) @@ -3343,7 +3456,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_addi_i32(addr, addr, offset); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } } @@ -3363,7 +3476,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest) if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) { tcg_gen_goto_tb(n); gen_set_pc_im(dest); - tcg_gen_exit_tb((long)tb + n); + tcg_gen_exit_tb((tcg_target_long)tb + n); } else { gen_set_pc_im(dest); tcg_gen_exit_tb(0); @@ -3412,6 +3525,10 @@ static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) { /* Mask out undefined bits. */ mask &= ~CPSR_RESERVED; + if (!arm_feature(env, ARM_FEATURE_V4T)) + mask &= ~CPSR_T; + if (!arm_feature(env, ARM_FEATURE_V5)) + mask &= ~CPSR_Q; /* V5TE in reality*/ if (!arm_feature(env, ARM_FEATURE_V6)) mask &= ~(CPSR_E | CPSR_GE); if (!arm_feature(env, ARM_FEATURE_THUMB2)) @@ -3442,7 +3559,7 @@ static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0) } else { gen_set_cpsr(t0, mask); } - dead_tmp(t0); + tcg_temp_free_i32(t0); gen_lookup_tb(s); return 0; } @@ -3451,7 +3568,7 @@ static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0) static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val) { TCGv tmp; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); return gen_set_psr(s, mask, spsr, tmp); } @@ -3463,7 +3580,7 @@ static void gen_exception_return(DisasContext *s, TCGv pc) store_reg(s, 15, pc); tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, 0xffffffff); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); s->is_jmp = DISAS_UPDATE; } @@ -3471,7 +3588,7 @@ static void gen_exception_return(DisasContext *s, TCGv pc) static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr) { gen_set_cpsr(cpsr, 0xffffffff); - dead_tmp(cpsr); + tcg_temp_free_i32(cpsr); store_reg(s, 15, pc); s->is_jmp = DISAS_UPDATE; } @@ -3481,7 +3598,7 @@ gen_set_condexec (DisasContext *s) { if (s->condexec_mask) { uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1); - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); store_cpu_field(tmp, condexec_bits); } @@ -3512,15 +3629,14 @@ static void gen_nop_hint(DisasContext *s, int val) #define CPU_V001 cpu_V0, cpu_V0, cpu_V1 -static inline int gen_neon_add(int size, TCGv t0, TCGv t1) +static inline void gen_neon_add(int size, TCGv t0, TCGv t1) { switch (size) { case 0: gen_helper_neon_add_u8(t0, t0, t1); break; case 1: gen_helper_neon_add_u16(t0, t0, t1); break; case 2: tcg_gen_add_i32(t0, t0, t1); break; - default: return 1; + default: abort(); } - return 0; } static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) @@ -3539,12 +3655,6 @@ static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 -/* FIXME: This is wrong. They set the wrong overflow bit. */ -#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c) -#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c) -#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c) -#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c) - #define GEN_NEON_INTEGER_OP_ENV(name) do { \ switch ((size << 1) | u) { \ case 0: \ @@ -3593,7 +3703,7 @@ static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) static TCGv neon_load_scratch(int scratch) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); return tmp; } @@ -3601,7 +3711,7 @@ static TCGv neon_load_scratch(int scratch) static void neon_store_scratch(int scratch, TCGv var) { tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - dead_tmp(var); + tcg_temp_free_i32(var); } static inline TCGv neon_get_scalar(int size, int reg) @@ -3620,122 +3730,90 @@ static inline TCGv neon_get_scalar(int size, int reg) return tmp; } -static void gen_neon_unzip_u8(TCGv t0, TCGv t1) -{ - TCGv rd, rm, tmp; - - rd = new_tmp(); - rm = new_tmp(); - tmp = new_tmp(); - - tcg_gen_andi_i32(rd, t0, 0xff); - tcg_gen_shri_i32(tmp, t0, 8); - tcg_gen_andi_i32(tmp, tmp, 0xff00); - tcg_gen_or_i32(rd, rd, tmp); - tcg_gen_shli_i32(tmp, t1, 16); - tcg_gen_andi_i32(tmp, tmp, 0xff0000); - tcg_gen_or_i32(rd, rd, tmp); - tcg_gen_shli_i32(tmp, t1, 8); - tcg_gen_andi_i32(tmp, tmp, 0xff000000); - tcg_gen_or_i32(rd, rd, tmp); - - tcg_gen_shri_i32(rm, t0, 8); - tcg_gen_andi_i32(rm, rm, 0xff); - tcg_gen_shri_i32(tmp, t0, 16); - tcg_gen_andi_i32(tmp, tmp, 0xff00); - tcg_gen_or_i32(rm, rm, tmp); - tcg_gen_shli_i32(tmp, t1, 8); - tcg_gen_andi_i32(tmp, tmp, 0xff0000); - tcg_gen_or_i32(rm, rm, tmp); - tcg_gen_andi_i32(tmp, t1, 0xff000000); - tcg_gen_or_i32(t1, rm, tmp); - tcg_gen_mov_i32(t0, rd); - - dead_tmp(tmp); - dead_tmp(rm); - dead_tmp(rd); -} - -static void gen_neon_zip_u8(TCGv t0, TCGv t1) -{ - TCGv rd, rm, tmp; - - rd = new_tmp(); - rm = new_tmp(); - tmp = new_tmp(); - - tcg_gen_andi_i32(rd, t0, 0xff); - tcg_gen_shli_i32(tmp, t1, 8); - tcg_gen_andi_i32(tmp, tmp, 0xff00); - tcg_gen_or_i32(rd, rd, tmp); - tcg_gen_shli_i32(tmp, t0, 16); - tcg_gen_andi_i32(tmp, tmp, 0xff0000); - tcg_gen_or_i32(rd, rd, tmp); - tcg_gen_shli_i32(tmp, t1, 24); - tcg_gen_andi_i32(tmp, tmp, 0xff000000); - tcg_gen_or_i32(rd, rd, tmp); - - tcg_gen_andi_i32(rm, t1, 0xff000000); - tcg_gen_shri_i32(tmp, t0, 8); - tcg_gen_andi_i32(tmp, tmp, 0xff0000); - tcg_gen_or_i32(rm, rm, tmp); - tcg_gen_shri_i32(tmp, t1, 8); - tcg_gen_andi_i32(tmp, tmp, 0xff00); - tcg_gen_or_i32(rm, rm, tmp); - tcg_gen_shri_i32(tmp, t0, 16); - tcg_gen_andi_i32(tmp, tmp, 0xff); - tcg_gen_or_i32(t1, rm, tmp); - tcg_gen_mov_i32(t0, rd); - - dead_tmp(tmp); - dead_tmp(rm); - dead_tmp(rd); -} - -static void gen_neon_zip_u16(TCGv t0, TCGv t1) +static int gen_neon_unzip(int rd, int rm, int size, int q) { TCGv tmp, tmp2; - - tmp = new_tmp(); - tmp2 = new_tmp(); - - tcg_gen_andi_i32(tmp, t0, 0xffff); - tcg_gen_shli_i32(tmp2, t1, 16); - tcg_gen_or_i32(tmp, tmp, tmp2); - tcg_gen_andi_i32(t1, t1, 0xffff0000); - tcg_gen_shri_i32(tmp2, t0, 16); - tcg_gen_or_i32(t1, t1, tmp2); - tcg_gen_mov_i32(t0, tmp); - - dead_tmp(tmp2); - dead_tmp(tmp); + if (!q && size == 2) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + switch (size) { + case 0: + gen_helper_neon_qunzip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_qunzip16(cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qunzip32(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } else { + switch (size) { + case 0: + gen_helper_neon_unzip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_unzip16(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + return 0; } -static void gen_neon_unzip(int reg, int q, int tmp, int size) +static int gen_neon_zip(int rd, int rm, int size, int q) { - int n; - TCGv t0, t1; - - for (n = 0; n < q + 1; n += 2) { - t0 = neon_load_reg(reg, n); - t1 = neon_load_reg(reg, n + 1); + TCGv tmp, tmp2; + if (!q && size == 2) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { switch (size) { - case 0: gen_neon_unzip_u8(t0, t1); break; - case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same. */ - case 2: /* no-op */; break; - default: abort(); + case 0: + gen_helper_neon_qzip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_qzip16(cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qzip32(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } else { + switch (size) { + case 0: + gen_helper_neon_zip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_zip16(cpu_env, tmp, tmp2); + break; + default: + abort(); } - neon_store_scratch(tmp + n, t0); - neon_store_scratch(tmp + n + 1, t1); } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + return 0; } static void gen_neon_trn_u8(TCGv t0, TCGv t1) { TCGv rd, tmp; - rd = new_tmp(); - tmp = new_tmp(); + rd = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); tcg_gen_shli_i32(rd, t0, 8); tcg_gen_andi_i32(rd, rd, 0xff00ff00); @@ -3748,16 +3826,16 @@ static void gen_neon_trn_u8(TCGv t0, TCGv t1) tcg_gen_or_i32(t1, t1, tmp); tcg_gen_mov_i32(t0, rd); - dead_tmp(tmp); - dead_tmp(rd); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(rd); } static void gen_neon_trn_u16(TCGv t0, TCGv t1) { TCGv rd, tmp; - rd = new_tmp(); - tmp = new_tmp(); + rd = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); tcg_gen_shli_i32(rd, t0, 16); tcg_gen_andi_i32(tmp, t1, 0xffff); @@ -3767,8 +3845,8 @@ static void gen_neon_trn_u16(TCGv t0, TCGv t1) tcg_gen_or_i32(t1, t1, tmp); tcg_gen_mov_i32(t0, rd); - dead_tmp(tmp); - dead_tmp(rd); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(rd); } @@ -3817,18 +3895,33 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) rn = (insn >> 16) & 0xf; rm = insn & 0xf; load = (insn & (1 << 21)) != 0; - addr = new_tmp(); if ((insn & (1 << 23)) == 0) { /* Load store all elements. */ op = (insn >> 8) & 0xf; size = (insn >> 6) & 3; if (op > 10) return 1; + /* Catch UNDEF cases for bad values of align field */ + switch (op & 0xc) { + case 4: + if (((insn >> 5) & 1) == 1) { + return 1; + } + break; + case 8: + if (((insn >> 4) & 3) == 3) { + return 1; + } + break; + default: + break; + } nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; spacing = neon_ls_element_type[op].spacing; if (size == 3 && (interleave | spacing) != 1) return 1; + addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); stride = (1 << size) * interleave; for (reg = 0; reg < nregs; reg++) { @@ -3869,11 +3962,11 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_addi_i32(addr, addr, stride); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); neon_store_reg(rd, pass, tmp); } else { tmp = neon_load_reg(rd, pass); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp2, tmp, 16); gen_st16(tmp, addr, IS_USER(s)); tcg_gen_addi_i32(addr, addr, stride); @@ -3891,14 +3984,14 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) } else { tcg_gen_shli_i32(tmp, tmp, n * 8); tcg_gen_or_i32(tmp2, tmp2, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } } neon_store_reg(rd, pass, tmp2); } else { tmp2 = neon_load_reg(rd, pass); for (n = 0; n < 4; n++) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (n == 0) { tcg_gen_mov_i32(tmp, tmp2); } else { @@ -3907,52 +4000,68 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_st8(tmp, addr, IS_USER(s)); tcg_gen_addi_i32(addr, addr, stride); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } } } rd += spacing; } + tcg_temp_free_i32(addr); stride = nregs * 8; } else { size = (insn >> 10) & 3; if (size == 3) { /* Load single element to all lanes. */ - if (!load) + int a = (insn >> 4) & 1; + if (!load) { return 1; + } size = (insn >> 6) & 3; nregs = ((insn >> 8) & 3) + 1; - stride = (insn & (1 << 5)) ? 2 : 1; - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - switch (size) { - case 0: - tmp = gen_ld8u(addr, IS_USER(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - tmp = gen_ld16u(addr, IS_USER(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - tmp = gen_ld32(addr, IS_USER(s)); - break; - case 3: + + if (size == 3) { + if (nregs != 4 || a == 0) { return 1; - default: /* Avoid compiler warnings. */ - abort(); } - tcg_gen_addi_i32(addr, addr, 1 << size); - tmp2 = new_tmp(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, 0, tmp2); - neon_store_reg(rd, 1, tmp); - rd += stride; + /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a == 1 && size == 0) { + return 1; + } + if (nregs == 3 && a == 1) { + return 1; } + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, rn); + if (nregs == 1) { + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + if (insn & (1 << 5)) { + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); + } + tcg_temp_free_i32(tmp); + } else { + /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ + stride = (insn & (1 << 5)) ? 2 : 1; + for (reg = 0; reg < nregs; reg++) { + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; + } + } + tcg_temp_free_i32(addr); stride = (1 << size) * nregs; } else { /* Single element. */ + int idx = (insn >> 4) & 0xf; pass = (insn >> 7) & 1; switch (size) { case 0: @@ -3971,6 +4080,40 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) abort(); } nregs = ((insn >> 8) & 3) + 1; + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((idx & (1 << size)) != 0) || + (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { + return 1; + } + break; + case 3: + if ((idx & 1) != 0) { + return 1; + } + /* fall through */ + case 2: + if (size == 2 && (idx & 2) != 0) { + return 1; + } + break; + case 4: + if ((size == 2) && ((idx & 3) == 3)) { + return 1; + } + break; + default: + abort(); + } + if ((rd + stride * (nregs - 1)) > 31) { + /* Attempts to write off the end of the register file + * are UNPREDICTABLE; we choose to UNDEF because otherwise + * the neon_load_reg() would write off the end of the array. + */ + return 1; + } + addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { @@ -3990,7 +4133,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) if (size != 2) { tmp2 = neon_load_reg(rd, pass); gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } else { /* Store */ @@ -4012,10 +4155,10 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) rd += stride; tcg_gen_addi_i32(addr, addr, 1 << size); } + tcg_temp_free_i32(addr); stride = nregs * (1 << size); } } - dead_tmp(addr); if (rm != 15) { TCGv base; @@ -4026,7 +4169,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGv index; index = load_reg(s, rm); tcg_gen_add_i32(base, base, index); - dead_tmp(index); + tcg_temp_free_i32(index); } store_reg(s, rn, base); } @@ -4071,6 +4214,16 @@ static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src) } } +static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src) +{ + switch (size) { + case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break; + case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break; + case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break; + default: abort(); + } +} + static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift, int q, int u) { @@ -4091,8 +4244,8 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift, } else { if (u) { switch (size) { - case 1: gen_helper_neon_rshl_u16(var, var, shift); break; - case 2: gen_helper_neon_rshl_u32(var, var, shift); break; + case 1: gen_helper_neon_shl_u16(var, var, shift); break; + case 2: gen_helper_neon_shl_u32(var, var, shift); break; default: abort(); } } else { @@ -4122,7 +4275,7 @@ static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u) default: abort(); } } - dead_tmp(src); + tcg_temp_free_i32(src); } static inline void gen_neon_addl(int size) @@ -4176,10 +4329,12 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u) case 4: tmp = gen_muls_i64_i32(a, b); tcg_gen_mov_i64(dest, tmp); + tcg_temp_free_i64(tmp); break; case 5: tmp = gen_mulu_i64_i32(a, b); tcg_gen_mov_i64(dest, tmp); + tcg_temp_free_i64(tmp); break; default: abort(); } @@ -4187,11 +4342,203 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u) /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. Don't forget to clean them now. */ if (size < 2) { - dead_tmp(a); - dead_tmp(b); + tcg_temp_free_i32(a); + tcg_temp_free_i32(b); + } +} + +static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src) +{ + if (op) { + if (u) { + gen_neon_unarrow_sats(size, dest, src); + } else { + gen_neon_narrow(size, dest, src); + } + } else { + if (u) { + gen_neon_narrow_satu(size, dest, src); + } else { + gen_neon_narrow_sats(size, dest, src); + } } } +/* Symbolic constants for op fields for Neon 3-register same-length. + * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B + * table A7-9. + */ +#define NEON_3R_VHADD 0 +#define NEON_3R_VQADD 1 +#define NEON_3R_VRHADD 2 +#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */ +#define NEON_3R_VHSUB 4 +#define NEON_3R_VQSUB 5 +#define NEON_3R_VCGT 6 +#define NEON_3R_VCGE 7 +#define NEON_3R_VSHL 8 +#define NEON_3R_VQSHL 9 +#define NEON_3R_VRSHL 10 +#define NEON_3R_VQRSHL 11 +#define NEON_3R_VMAX 12 +#define NEON_3R_VMIN 13 +#define NEON_3R_VABD 14 +#define NEON_3R_VABA 15 +#define NEON_3R_VADD_VSUB 16 +#define NEON_3R_VTST_VCEQ 17 +#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VMUL 19 +#define NEON_3R_VPMAX 20 +#define NEON_3R_VPMIN 21 +#define NEON_3R_VQDMULH_VQRDMULH 22 +#define NEON_3R_VPADD 23 +#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ +#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ +#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ +#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ +#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ +#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */ + +static const uint8_t neon_3r_sizes[] = { + [NEON_3R_VHADD] = 0x7, + [NEON_3R_VQADD] = 0xf, + [NEON_3R_VRHADD] = 0x7, + [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */ + [NEON_3R_VHSUB] = 0x7, + [NEON_3R_VQSUB] = 0xf, + [NEON_3R_VCGT] = 0x7, + [NEON_3R_VCGE] = 0x7, + [NEON_3R_VSHL] = 0xf, + [NEON_3R_VQSHL] = 0xf, + [NEON_3R_VRSHL] = 0xf, + [NEON_3R_VQRSHL] = 0xf, + [NEON_3R_VMAX] = 0x7, + [NEON_3R_VMIN] = 0x7, + [NEON_3R_VABD] = 0x7, + [NEON_3R_VABA] = 0x7, + [NEON_3R_VADD_VSUB] = 0xf, + [NEON_3R_VTST_VCEQ] = 0x7, + [NEON_3R_VML] = 0x7, + [NEON_3R_VMUL] = 0x7, + [NEON_3R_VPMAX] = 0x7, + [NEON_3R_VPMIN] = 0x7, + [NEON_3R_VQDMULH_VQRDMULH] = 0x6, + [NEON_3R_VPADD] = 0x7, + [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */ +}; + +/* Symbolic constants for op fields for Neon 2-register miscellaneous. + * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B + * table A7-13. + */ +#define NEON_2RM_VREV64 0 +#define NEON_2RM_VREV32 1 +#define NEON_2RM_VREV16 2 +#define NEON_2RM_VPADDL 4 +#define NEON_2RM_VPADDL_U 5 +#define NEON_2RM_VCLS 8 +#define NEON_2RM_VCLZ 9 +#define NEON_2RM_VCNT 10 +#define NEON_2RM_VMVN 11 +#define NEON_2RM_VPADAL 12 +#define NEON_2RM_VPADAL_U 13 +#define NEON_2RM_VQABS 14 +#define NEON_2RM_VQNEG 15 +#define NEON_2RM_VCGT0 16 +#define NEON_2RM_VCGE0 17 +#define NEON_2RM_VCEQ0 18 +#define NEON_2RM_VCLE0 19 +#define NEON_2RM_VCLT0 20 +#define NEON_2RM_VABS 22 +#define NEON_2RM_VNEG 23 +#define NEON_2RM_VCGT0_F 24 +#define NEON_2RM_VCGE0_F 25 +#define NEON_2RM_VCEQ0_F 26 +#define NEON_2RM_VCLE0_F 27 +#define NEON_2RM_VCLT0_F 28 +#define NEON_2RM_VABS_F 30 +#define NEON_2RM_VNEG_F 31 +#define NEON_2RM_VSWP 32 +#define NEON_2RM_VTRN 33 +#define NEON_2RM_VUZP 34 +#define NEON_2RM_VZIP 35 +#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ +#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ +#define NEON_2RM_VSHLL 38 +#define NEON_2RM_VCVT_F16_F32 44 +#define NEON_2RM_VCVT_F32_F16 46 +#define NEON_2RM_VRECPE 56 +#define NEON_2RM_VRSQRTE 57 +#define NEON_2RM_VRECPE_F 58 +#define NEON_2RM_VRSQRTE_F 59 +#define NEON_2RM_VCVT_FS 60 +#define NEON_2RM_VCVT_FU 61 +#define NEON_2RM_VCVT_SF 62 +#define NEON_2RM_VCVT_UF 63 + +static int neon_2rm_is_float_op(int op) +{ + /* Return true if this neon 2reg-misc op is float-to-float */ + return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F || + op >= NEON_2RM_VRECPE_F); +} + +/* Each entry in this array has bit n set if the insn allows + * size value n (otherwise it will UNDEF). Since unallocated + * op values will have no bits set they always UNDEF. + */ +static const uint8_t neon_2rm_sizes[] = { + [NEON_2RM_VREV64] = 0x7, + [NEON_2RM_VREV32] = 0x3, + [NEON_2RM_VREV16] = 0x1, + [NEON_2RM_VPADDL] = 0x7, + [NEON_2RM_VPADDL_U] = 0x7, + [NEON_2RM_VCLS] = 0x7, + [NEON_2RM_VCLZ] = 0x7, + [NEON_2RM_VCNT] = 0x1, + [NEON_2RM_VMVN] = 0x1, + [NEON_2RM_VPADAL] = 0x7, + [NEON_2RM_VPADAL_U] = 0x7, + [NEON_2RM_VQABS] = 0x7, + [NEON_2RM_VQNEG] = 0x7, + [NEON_2RM_VCGT0] = 0x7, + [NEON_2RM_VCGE0] = 0x7, + [NEON_2RM_VCEQ0] = 0x7, + [NEON_2RM_VCLE0] = 0x7, + [NEON_2RM_VCLT0] = 0x7, + [NEON_2RM_VABS] = 0x7, + [NEON_2RM_VNEG] = 0x7, + [NEON_2RM_VCGT0_F] = 0x4, + [NEON_2RM_VCGE0_F] = 0x4, + [NEON_2RM_VCEQ0_F] = 0x4, + [NEON_2RM_VCLE0_F] = 0x4, + [NEON_2RM_VCLT0_F] = 0x4, + [NEON_2RM_VABS_F] = 0x4, + [NEON_2RM_VNEG_F] = 0x4, + [NEON_2RM_VSWP] = 0x1, + [NEON_2RM_VTRN] = 0x7, + [NEON_2RM_VUZP] = 0x7, + [NEON_2RM_VZIP] = 0x7, + [NEON_2RM_VMOVN] = 0x7, + [NEON_2RM_VQMOVN] = 0x7, + [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_VCVT_F16_F32] = 0x2, + [NEON_2RM_VCVT_F32_F16] = 0x2, + [NEON_2RM_VRECPE] = 0x4, + [NEON_2RM_VRSQRTE] = 0x4, + [NEON_2RM_VRECPE_F] = 0x4, + [NEON_2RM_VRSQRTE_F] = 0x4, + [NEON_2RM_VCVT_FS] = 0x4, + [NEON_2RM_VCVT_FU] = 0x4, + [NEON_2RM_VCVT_SF] = 0x4, + [NEON_2RM_VCVT_UF] = 0x4, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -4208,7 +4555,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) int count; int pairwise; int u; - int n; uint32_t imm, mask; TCGv tmp, tmp2, tmp3, tmp4, tmp5; TCGv_i64 tmp64; @@ -4224,35 +4570,48 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); - if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9 - || op == 10 || op == 11 || op == 16)) { - /* 64-bit element instructions. */ + /* Catch invalid op and bad size combinations: UNDEF */ + if ((neon_3r_sizes[op] & (1 << size)) == 0) { + return 1; + } + /* All insns of this form UNDEF for either this condition or the + * superset of cases "Q==1"; we catch the latter later. + */ + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + if (size == 3 && op != NEON_3R_LOGIC) { + /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(cpu_V0, rn + pass); neon_load_reg64(cpu_V1, rm + pass); switch (op) { - case 1: /* VQADD */ + case NEON_3R_VQADD: if (u) { - gen_helper_neon_add_saturate_u64(CPU_V001); + gen_helper_neon_qadd_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } else { - gen_helper_neon_add_saturate_s64(CPU_V001); + gen_helper_neon_qadd_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } break; - case 5: /* VQSUB */ + case NEON_3R_VQSUB: if (u) { - gen_helper_neon_sub_saturate_u64(CPU_V001); + gen_helper_neon_qsub_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } else { - gen_helper_neon_sub_saturate_s64(CPU_V001); + gen_helper_neon_qsub_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } break; - case 8: /* VSHL */ + case NEON_3R_VSHL: if (u) { gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); } else { gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); } break; - case 9: /* VQSHL */ + case NEON_3R_VQSHL: if (u) { gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V1, cpu_V0); @@ -4261,14 +4620,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) cpu_V1, cpu_V0); } break; - case 10: /* VRSHL */ + case NEON_3R_VRSHL: if (u) { gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); } else { gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); } break; - case 11: /* VQRSHL */ + case NEON_3R_VQRSHL: if (u) { gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, cpu_V1, cpu_V0); @@ -4277,7 +4636,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) cpu_V1, cpu_V0); } break; - case 16: + case NEON_3R_VADD_VSUB: if (u) { tcg_gen_sub_i64(CPU_V001); } else { @@ -4291,50 +4650,76 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } return 0; } + pairwise = 0; switch (op) { - case 8: /* VSHL */ - case 9: /* VQSHL */ - case 10: /* VRSHL */ - case 11: /* VQRSHL */ + case NEON_3R_VSHL: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: { int rtmp; /* Shift instruction operands are reversed. */ rtmp = rn; rn = rm; rm = rtmp; - pairwise = 0; } break; - case 20: /* VPMAX */ - case 21: /* VPMIN */ - case 23: /* VPADD */ + case NEON_3R_VPADD: + if (u) { + return 1; + } + /* Fall through */ + case NEON_3R_VPMAX: + case NEON_3R_VPMIN: pairwise = 1; break; - case 26: /* VPADD (float) */ - pairwise = (u && size < 2); + case NEON_3R_FLOAT_ARITH: + pairwise = (u && size < 2); /* if VPADD (float) */ break; - case 30: /* VPMIN/VPMAX (float) */ - pairwise = u; + case NEON_3R_FLOAT_MINMAX: + pairwise = u; /* if VPMIN/VPMAX (float) */ + break; + case NEON_3R_FLOAT_CMP: + if (!u && size) { + /* no encoding for U=0 C=1x */ + return 1; + } + break; + case NEON_3R_FLOAT_ACMP: + if (!u) { + return 1; + } + break; + case NEON_3R_VRECPS_VRSQRTS: + if (u) { + return 1; + } + break; + case NEON_3R_VMUL: + if (u && (size != 0)) { + /* UNDEF on invalid size for polynomial subcase */ + return 1; + } break; default: - pairwise = 0; break; } + if (pairwise && q) { + /* All the pairwise insns UNDEF if Q is set */ + return 1; + } + for (pass = 0; pass < (q ? 4 : 2); pass++) { if (pairwise) { /* Pairwise. */ - if (q) - n = (pass & 1) * 2; - else - n = 0; - if (pass < q + 1) { - tmp = neon_load_reg(rn, n); - tmp2 = neon_load_reg(rn, n + 1); + if (pass < 1) { + tmp = neon_load_reg(rn, 0); + tmp2 = neon_load_reg(rn, 1); } else { - tmp = neon_load_reg(rm, n); - tmp2 = neon_load_reg(rm, n + 1); + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); } } else { /* Elementwise. */ @@ -4342,16 +4727,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rm, pass); } switch (op) { - case 0: /* VHADD */ + case NEON_3R_VHADD: GEN_NEON_INTEGER_OP(hadd); break; - case 1: /* VQADD */ + case NEON_3R_VQADD: GEN_NEON_INTEGER_OP_ENV(qadd); break; - case 2: /* VRHADD */ + case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case 3: /* Logic ops. */ + case NEON_3R_LOGIC: /* Logic ops. */ switch ((u << 2) | size) { case 0: /* VAND */ tcg_gen_and_i32(tmp, tmp, tmp2); @@ -4371,97 +4756,96 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 5: /* VBSL */ tmp3 = neon_load_reg(rd, pass); gen_neon_bsl(tmp, tmp, tmp2, tmp3); - dead_tmp(tmp3); + tcg_temp_free_i32(tmp3); break; case 6: /* VBIT */ tmp3 = neon_load_reg(rd, pass); gen_neon_bsl(tmp, tmp, tmp3, tmp2); - dead_tmp(tmp3); + tcg_temp_free_i32(tmp3); break; case 7: /* VBIF */ tmp3 = neon_load_reg(rd, pass); gen_neon_bsl(tmp, tmp3, tmp, tmp2); - dead_tmp(tmp3); + tcg_temp_free_i32(tmp3); break; } break; - case 4: /* VHSUB */ + case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; - case 5: /* VQSUB */ + case NEON_3R_VQSUB: GEN_NEON_INTEGER_OP_ENV(qsub); break; - case 6: /* VCGT */ + case NEON_3R_VCGT: GEN_NEON_INTEGER_OP(cgt); break; - case 7: /* VCGE */ + case NEON_3R_VCGE: GEN_NEON_INTEGER_OP(cge); break; - case 8: /* VSHL */ + case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; - case 9: /* VQSHL */ + case NEON_3R_VQSHL: GEN_NEON_INTEGER_OP_ENV(qshl); break; - case 10: /* VRSHL */ + case NEON_3R_VRSHL: GEN_NEON_INTEGER_OP(rshl); break; - case 11: /* VQRSHL */ + case NEON_3R_VQRSHL: GEN_NEON_INTEGER_OP_ENV(qrshl); break; - case 12: /* VMAX */ + case NEON_3R_VMAX: GEN_NEON_INTEGER_OP(max); break; - case 13: /* VMIN */ + case NEON_3R_VMIN: GEN_NEON_INTEGER_OP(min); break; - case 14: /* VABD */ + case NEON_3R_VABD: GEN_NEON_INTEGER_OP(abd); break; - case 15: /* VABA */ + case NEON_3R_VABA: GEN_NEON_INTEGER_OP(abd); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tmp2 = neon_load_reg(rd, pass); gen_neon_add(size, tmp, tmp2); break; - case 16: + case NEON_3R_VADD_VSUB: if (!u) { /* VADD */ - if (gen_neon_add(size, tmp, tmp2)) - return 1; + gen_neon_add(size, tmp, tmp2); } else { /* VSUB */ switch (size) { case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } } break; - case 17: + case NEON_3R_VTST_VCEQ: if (!u) { /* VTST */ switch (size) { case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } } else { /* VCEQ */ switch (size) { case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } } break; - case 18: /* Multiply. */ + case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ switch (size) { case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tmp2 = neon_load_reg(rd, pass); if (u) { /* VMLS */ gen_neon_rsb(size, tmp, tmp2); @@ -4469,7 +4853,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_add(size, tmp, tmp2); } break; - case 19: /* VMUL */ + case NEON_3R_VMUL: if (u) { /* polynomial */ gen_helper_neon_mul_p8(tmp, tmp, tmp2); } else { /* Integer */ @@ -4477,96 +4861,121 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } } break; - case 20: /* VPMAX */ + case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); break; - case 21: /* VPMIN */ + case NEON_3R_VPMIN: GEN_NEON_INTEGER_OP(pmin); break; - case 22: /* Hultiply high. */ + case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */ if (!u) { /* VQDMULH */ switch (size) { - case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break; - case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break; - default: return 1; + case 1: + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; + default: abort(); } - } else { /* VQRDHMUL */ + } else { /* VQRDMULH */ switch (size) { - case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break; - case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break; - default: return 1; + case 1: + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; + default: abort(); } } break; - case 23: /* VPADD */ - if (u) - return 1; + case NEON_3R_VPADD: switch (size) { case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } break; - case 26: /* Floating point arithnetic. */ + case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); switch ((u << 2) | size) { case 0: /* VADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + case 4: /* VPADD */ + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); break; case 2: /* VSUB */ - gen_helper_neon_sub_f32(tmp, tmp, tmp2); - break; - case 4: /* VPADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus); break; case 6: /* VABD */ - gen_helper_neon_abd_f32(tmp, tmp, tmp2); + gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus); break; default: - return 1; + abort(); } + tcg_temp_free_ptr(fpstatus); break; - case 27: /* Float multiply. */ - gen_helper_neon_mul_f32(tmp, tmp, tmp2); + } + case NEON_3R_FLOAT_MULTIPLY: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); if (!u) { - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tmp2 = neon_load_reg(rd, pass); if (size == 0) { - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); } else { - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); } } + tcg_temp_free_ptr(fpstatus); break; - case 28: /* Float compare. */ + } + case NEON_3R_FLOAT_CMP: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); if (!u) { - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus); } else { - if (size == 0) - gen_helper_neon_cge_f32(tmp, tmp, tmp2); - else - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + if (size == 0) { + gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); + } } + tcg_temp_free_ptr(fpstatus); break; - case 29: /* Float compare absolute. */ - if (!u) - return 1; - if (size == 0) - gen_helper_neon_acge_f32(tmp, tmp, tmp2); - else - gen_helper_neon_acgt_f32(tmp, tmp, tmp2); + } + case NEON_3R_FLOAT_ACMP: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); break; - case 30: /* Float min/max. */ - if (size == 0) - gen_helper_neon_max_f32(tmp, tmp, tmp2); - else - gen_helper_neon_min_f32(tmp, tmp, tmp2); + } + case NEON_3R_FLOAT_MINMAX: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); break; - case 31: + } + case NEON_3R_VRECPS_VRSQRTS: if (size == 0) gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); else @@ -4575,7 +4984,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) default: abort(); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); /* Save the result. For elementwise operations we can put it straight into the destination register. For pairwise operations @@ -4599,7 +5008,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two registers and shift. */ op = (insn >> 8) & 0xf; if (insn & (1 << 7)) { - /* 64-bit shift. */ + /* 64-bit shift. */ + if (op > 7) { + return 1; + } size = 3; } else { size = 2; @@ -4612,6 +5024,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ + if (q && ((rd | rm) & 1)) { + return 1; + } + if (!u && (op == 4 || op == 6)) { + return 1; + } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ if (op <= 4) @@ -4659,20 +5077,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; case 4: /* VSRI */ - if (!u) - return 1; - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 5: /* VSHL, VSLI */ gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); break; case 6: /* VQSHLU */ - if (u) { - gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - return 1; - } + gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); break; case 7: /* VQSHL */ if (u) { @@ -4686,17 +5096,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } if (op == 1 || op == 3) { /* Accumulate. */ - neon_load_reg64(cpu_V0, rd + pass); + neon_load_reg64(cpu_V1, rd + pass); tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); } else if (op == 4 || (op == 5 && u)) { /* Insert */ - cpu_abort(env, "VS[LR]I.64 not implemented"); + neon_load_reg64(cpu_V1, rd + pass); + uint64_t mask; + if (shift < -63 || shift > 63) { + mask = 0; + } else { + if (op == 4) { + mask = 0xffffffffffffffffull >> -shift; + } else { + mask = 0xffffffffffffffffull << shift; + } + } + tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ /* Operands in T0 and T1. */ tmp = neon_load_reg(rm, pass); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, imm); switch (op) { case 0: /* VSHR */ @@ -4708,22 +5130,15 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(rshl); break; case 4: /* VSRI */ - if (!u) - return 1; - GEN_NEON_INTEGER_OP(shl); - break; case 5: /* VSHL, VSLI */ switch (size) { case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } break; case 6: /* VQSHLU */ - if (!u) { - return 1; - } switch (size) { case 0: gen_helper_neon_qshlu_s8(tmp, cpu_env, @@ -4738,20 +5153,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp, tmp2); break; default: - return 1; + abort(); } break; case 7: /* VQSHL */ GEN_NEON_INTEGER_OP_ENV(qshl); break; } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (op == 1 || op == 3) { /* Accumulate. */ tmp2 = neon_load_reg(rd, pass); - gen_neon_add(size, tmp2, tmp); - dead_tmp(tmp2); + gen_neon_add(size, tmp, tmp2); + tcg_temp_free_i32(tmp2); } else if (op == 4 || (op == 5 && u)) { /* Insert */ switch (size) { @@ -4787,7 +5202,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_andi_i32(tmp, tmp, mask); tcg_gen_andi_i32(tmp2, tmp2, ~mask); tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } @@ -4795,71 +5210,81 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } else if (op < 10) { /* Shift by immediate and narrow: VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ + int input_unsigned = (op == 8) ? !u : u; + if (rm & 1) { + return 1; + } shift = shift - (1 << (size + 3)); size++; - switch (size) { - case 1: - imm = (uint16_t)shift; - imm |= imm << 16; - tmp2 = tcg_const_i32(imm); - TCGV_UNUSED_I64(tmp64); - break; - case 2: - imm = (uint32_t)shift; - tmp2 = tcg_const_i32(imm); - TCGV_UNUSED_I64(tmp64); - break; - case 3: + if (size == 3) { tmp64 = tcg_const_i64(shift); - TCGV_UNUSED(tmp2); - break; - default: - abort(); - } - - for (pass = 0; pass < 2; pass++) { - if (size == 3) { - neon_load_reg64(cpu_V0, rm + pass); + neon_load_reg64(cpu_V0, rm); + neon_load_reg64(cpu_V1, rm + 1); + for (pass = 0; pass < 2; pass++) { + TCGv_i64 in; + if (pass == 0) { + in = cpu_V0; + } else { + in = cpu_V1; + } if (q) { - if (u) - gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64); - else - gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64); + if (input_unsigned) { + gen_helper_neon_rshl_u64(cpu_V0, in, tmp64); + } else { + gen_helper_neon_rshl_s64(cpu_V0, in, tmp64); + } } else { - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64); + if (input_unsigned) { + gen_helper_neon_shl_u64(cpu_V0, in, tmp64); + } else { + gen_helper_neon_shl_s64(cpu_V0, in, tmp64); + } } - } else { - tmp = neon_load_reg(rm + pass, 0); - gen_neon_shift_narrow(size, tmp, tmp2, q, u); - tmp3 = neon_load_reg(rm + pass, 1); - gen_neon_shift_narrow(size, tmp3, tmp2, q, u); - tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); - dead_tmp(tmp); - dead_tmp(tmp3); - } - tmp = new_tmp(); - if (op == 8 && !u) { - gen_neon_narrow(size - 1, tmp, cpu_V0); - } else { - if (op == 8) - gen_neon_narrow_sats(size - 1, tmp, cpu_V0); - else - gen_neon_narrow_satu(size - 1, tmp, cpu_V0); - } - neon_store_reg(rd, pass, tmp); - } /* for pass */ - if (size == 3) { + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); + neon_store_reg(rd, pass, tmp); + } /* for pass */ tcg_temp_free_i64(tmp64); } else { + if (size == 1) { + imm = (uint16_t)shift; + imm |= imm << 16; + } else { + /* size == 2 */ + imm = (uint32_t)shift; + } + tmp2 = tcg_const_i32(imm); + tmp4 = neon_load_reg(rm + 1, 0); + tmp5 = neon_load_reg(rm + 1, 1); + for (pass = 0; pass < 2; pass++) { + if (pass == 0) { + tmp = neon_load_reg(rm, 0); + } else { + tmp = tmp4; + } + gen_neon_shift_narrow(size, tmp, tmp2, q, + input_unsigned); + if (pass == 0) { + tmp3 = neon_load_reg(rm, 1); + } else { + tmp3 = tmp5; + } + gen_neon_shift_narrow(size, tmp3, tmp2, q, + input_unsigned); + tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp3); + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); + neon_store_reg(rd, pass, tmp); + } /* for pass */ tcg_temp_free_i32(tmp2); } } else if (op == 10) { - /* VSHLL */ - if (q || size == 3) + /* VSHLL, VMOVL */ + if (q || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { @@ -4872,22 +5297,37 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* The shift is less than the width of the source type, so we can just shift the whole register. */ tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); + /* Widen the result of shift: we need to clear + * the potential overflow bits resulting from + * left bits of the narrow input appearing as + * right bits of left the neighbour narrow + * input. */ if (size < 2 || !u) { uint64_t imm64; if (size == 0) { imm = (0xffu >> (8 - shift)); imm |= imm << 16; - } else { + } else if (size == 1) { imm = 0xffff >> (16 - shift); + } else { + /* size == 2 */ + imm = 0xffffffff >> (32 - shift); + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; } - imm64 = imm | (((uint64_t)imm) << 32); - tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); + tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64); } } neon_store_reg64(cpu_V0, rd + pass); } } else if (op >= 14) { /* VCVT fixed-point. */ + if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { + return 1; + } /* We have already masked out the must-be-1 top bit of imm6, * hence this 32-shift where the ARM ARM has 64-imm6. */ @@ -4896,14 +5336,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); if (!(op & 1)) { if (u) - gen_vfp_ulto(0, shift); + gen_vfp_ulto(0, shift, 1); else - gen_vfp_slto(0, shift); + gen_vfp_slto(0, shift, 1); } else { if (u) - gen_vfp_toul(0, shift); + gen_vfp_toul(0, shift, 1); else - gen_vfp_tosl(0, shift); + gen_vfp_tosl(0, shift, 1); } tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); } @@ -4912,11 +5352,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else { /* (insn & 0x00380080) == 0 */ int invert; + if (q && (rd & 1)) { + return 1; + } op = (insn >> 8) & 0xf; /* One register and immediate. */ imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); invert = (insn & (1 << 5)) != 0; + /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + */ switch (op) { case 0: case 1: /* no-op */ @@ -4948,6 +5395,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) imm = ~imm; break; case 15: + if (invert) { + return 1; + } imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; @@ -4967,8 +5417,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else { /* VMOV, VMVN. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (op == 14 && invert) { + int n; uint32_t val; val = 0; for (n = 0; n < 4; n++) { @@ -4991,31 +5442,47 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) int src1_wide; int src2_wide; int prewiden; - /* prewiden, src1_wide, src2_wide */ - static const int neon_3reg_wide[16][3] = { - {1, 0, 0}, /* VADDL */ - {1, 1, 0}, /* VADDW */ - {1, 0, 0}, /* VSUBL */ - {1, 1, 0}, /* VSUBW */ - {0, 1, 1}, /* VADDHN */ - {0, 0, 0}, /* VABAL */ - {0, 1, 1}, /* VSUBHN */ - {0, 0, 0}, /* VABDL */ - {0, 0, 0}, /* VMLAL */ - {0, 0, 0}, /* VQDMLAL */ - {0, 0, 0}, /* VMLSL */ - {0, 0, 0}, /* VQDMLSL */ - {0, 0, 0}, /* Integer VMULL */ - {0, 0, 0}, /* VQDMULL */ - {0, 0, 0} /* Polynomial VMULL */ + /* undefreq: bit 0 : UNDEF if size != 0 + * bit 1 : UNDEF if size == 0 + * bit 2 : UNDEF if U == 1 + * Note that [1:0] set implies 'always UNDEF' + */ + int undefreq; + /* prewiden, src1_wide, src2_wide, undefreq */ + static const int neon_3reg_wide[16][4] = { + {1, 0, 0, 0}, /* VADDL */ + {1, 1, 0, 0}, /* VADDW */ + {1, 0, 0, 0}, /* VSUBL */ + {1, 1, 0, 0}, /* VSUBW */ + {0, 1, 1, 0}, /* VADDHN */ + {0, 0, 0, 0}, /* VABAL */ + {0, 1, 1, 0}, /* VSUBHN */ + {0, 0, 0, 0}, /* VABDL */ + {0, 0, 0, 0}, /* VMLAL */ + {0, 0, 0, 6}, /* VQDMLAL */ + {0, 0, 0, 0}, /* VMLSL */ + {0, 0, 0, 6}, /* VQDMLSL */ + {0, 0, 0, 0}, /* Integer VMULL */ + {0, 0, 0, 2}, /* VQDMULL */ + {0, 0, 0, 5}, /* Polynomial VMULL */ + {0, 0, 0, 3}, /* Reserved: always UNDEF */ }; prewiden = neon_3reg_wide[op][0]; src1_wide = neon_3reg_wide[op][1]; src2_wide = neon_3reg_wide[op][2]; + undefreq = neon_3reg_wide[op][3]; - if (size == 0 && (op == 9 || op == 11 || op == 13)) + if (((undefreq & 1) && (size != 0)) || + ((undefreq & 2) && (size == 0)) || + ((undefreq & 4) && u)) { + return 1; + } + if ((src1_wide && (rn & 1)) || + (src2_wide && (rm & 1)) || + (!src2_wide && (rd & 1))) { return 1; + } /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide @@ -5084,48 +5551,49 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; default: abort(); } - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 8: case 9: case 10: case 11: case 12: case 13: /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ gen_neon_mull(cpu_V0, tmp, tmp2, size, u); break; case 14: /* Polynomial VMULL */ - cpu_abort(env, "Polynomial VMULL not implemented"); - - default: /* 15 is RESERVED. */ - return 1; + gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + default: /* 15 is RESERVED: caught earlier */ + abort(); } - if (op == 5 || op == 13 || (op >= 8 && op <= 11)) { + if (op == 13) { + /* VQDMULL */ + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + neon_store_reg64(cpu_V0, rd + pass); + } else if (op == 5 || (op >= 8 && op <= 11)) { /* Accumulate. */ - if (op == 10 || op == 11) { - gen_neon_negl(cpu_V0, size); - } - - if (op != 13) { - neon_load_reg64(cpu_V1, rd + pass); - } - + neon_load_reg64(cpu_V1, rd + pass); switch (op) { - case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */ + case 10: /* VMLSL */ + gen_neon_negl(cpu_V0, size); + /* Fall through */ + case 5: case 8: /* VABAL, VMLAL */ gen_neon_addl(size); break; case 9: case 11: /* VQDMLAL, VQDMLSL */ gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + if (op == 11) { + gen_neon_negl(cpu_V0, size); + } gen_neon_addl_saturate(cpu_V0, cpu_V1, size); break; - /* Fall through. */ - case 13: /* VQDMULL */ - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - break; default: abort(); } neon_store_reg64(cpu_V0, rd + pass); } else if (op == 4 || op == 6) { /* Narrowing operation. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (!u) { switch (size) { case 0: @@ -5168,16 +5636,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } } else { - /* Two registers and a scalar. */ + /* Two registers and a scalar. NB that for ops of this form + * the ARM ARM labels bit 24 as Q, but it is in our variable + * 'u', not 'q'. + */ + if (size == 0) { + return 1; + } switch (op) { - case 0: /* Integer VMLA scalar */ case 1: /* Float VMLA scalar */ - case 4: /* Integer VMLS scalar */ case 5: /* Floating point VMLS scalar */ - case 8: /* Integer VMUL scalar */ case 9: /* Floating point VMUL scalar */ + if (size == 1) { + return 1; + } + /* fall through */ + case 0: /* Integer VMLA scalar */ + case 4: /* Integer VMLS scalar */ + case 8: /* Integer VMUL scalar */ case 12: /* VQDMULH scalar */ case 13: /* VQRDMULH scalar */ + if (u && ((rd | rn) & 1)) { + return 1; + } tmp = neon_get_scalar(size, rm); neon_store_scratch(0, tmp); for (pass = 0; pass < (u ? 4 : 2); pass++) { @@ -5196,16 +5677,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); } } else if (op & 1) { - gen_helper_neon_mul_f32(tmp, tmp, tmp2); + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); } else { switch (size) { case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (op < 8) { /* Accumulate. */ tmp2 = neon_load_reg(rd, pass); @@ -5214,35 +5697,47 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_add(size, tmp, tmp2); break; case 1: - gen_helper_neon_add_f32(tmp, tmp, tmp2); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case 4: gen_neon_rsb(size, tmp, tmp2); break; case 5: - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } default: abort(); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } break; - case 2: /* VMLAL sclar */ case 3: /* VQDMLAL scalar */ - case 6: /* VMLSL scalar */ case 7: /* VQDMLSL scalar */ - case 10: /* VMULL scalar */ case 11: /* VQDMULL scalar */ - if (size == 0 && (op == 3 || op == 7 || op == 11)) + if (u == 1) { return 1; - + } + /* fall through */ + case 2: /* VMLAL sclar */ + case 6: /* VMLSL scalar */ + case 10: /* VMULL scalar */ + if (rd & 1) { + return 1; + } tmp2 = neon_get_scalar(size, rm); /* We need a copy of tmp2 because gen_neon_mull * deletes it during pass 0. */ - tmp4 = new_tmp(); + tmp4 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp4, tmp2); tmp3 = neon_load_reg(rn, 1); @@ -5254,18 +5749,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp2 = tmp4; } gen_neon_mull(cpu_V0, tmp, tmp2, size, u); - if (op == 6 || op == 7) { - gen_neon_negl(cpu_V0, size); - } if (op != 11) { neon_load_reg64(cpu_V1, rd + pass); } switch (op) { - case 2: case 6: + case 6: + gen_neon_negl(cpu_V0, size); + /* Fall through */ + case 2: gen_neon_addl(size); break; case 3: case 7: gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + if (op == 7) { + gen_neon_negl(cpu_V0, size); + } gen_neon_addl_saturate(cpu_V0, cpu_V1, size); break; case 10: @@ -5294,6 +5792,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (imm > 7 && !q) return 1; + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + if (imm == 0) { neon_load_reg64(cpu_V0, rn); if (q) { @@ -5342,10 +5844,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two register misc. */ op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); size = (insn >> 18) & 3; + /* UNDEF for unknown op values and bad op-size combinations */ + if ((neon_2rm_sizes[op] & (1 << size)) == 0) { + return 1; + } + if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) && + q && ((rm | rd) & 1)) { + return 1; + } switch (op) { - case 0: /* VREV64 */ - if (size == 3) - return 1; + case NEON_2RM_VREV64: for (pass = 0; pass < (q ? 2 : 1); pass++) { tmp = neon_load_reg(rm, pass * 2); tmp2 = neon_load_reg(rm, pass * 2 + 1); @@ -5368,10 +5876,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } break; - case 4: case 5: /* VPADDL */ - case 12: case 13: /* VPADAL */ - if (size == 3) - return 1; + case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U: + case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U: for (pass = 0; pass < q + 1; pass++) { tmp = neon_load_reg(rm, pass * 2); gen_neon_widen(cpu_V0, tmp, size, op & 1); @@ -5383,7 +5889,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 2: tcg_gen_add_i64(CPU_V001); break; default: abort(); } - if (op >= 12) { + if (op >= NEON_2RM_VPADAL) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); gen_neon_addl(size); @@ -5391,8 +5897,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg64(cpu_V0, rd + pass); } break; - case 33: /* VTRN */ + case NEON_2RM_VTRN: if (size == 2) { + int n; for (n = 0; n < (q ? 4 : 2); n += 2) { tmp = neon_load_reg(rm, n); tmp2 = neon_load_reg(rd, n + 1); @@ -5403,73 +5910,27 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) goto elementwise; } break; - case 34: /* VUZP */ - /* Reg Before After - Rd A3 A2 A1 A0 B2 B0 A2 A0 - Rm B3 B2 B1 B0 B3 B1 A3 A1 - */ - if (size == 3) + case NEON_2RM_VUZP: + if (gen_neon_unzip(rd, rm, size, q)) { return 1; - gen_neon_unzip(rd, q, 0, size); - gen_neon_unzip(rm, q, 4, size); - if (q) { - static int unzip_order_q[8] = - {0, 2, 4, 6, 1, 3, 5, 7}; - for (n = 0; n < 8; n++) { - int reg = (n < 4) ? rd : rm; - tmp = neon_load_scratch(unzip_order_q[n]); - neon_store_reg(reg, n % 4, tmp); - } - } else { - static int unzip_order[4] = - {0, 4, 1, 5}; - for (n = 0; n < 4; n++) { - int reg = (n < 2) ? rd : rm; - tmp = neon_load_scratch(unzip_order[n]); - neon_store_reg(reg, n % 2, tmp); - } } break; - case 35: /* VZIP */ - /* Reg Before After - Rd A3 A2 A1 A0 B1 A1 B0 A0 - Rm B3 B2 B1 B0 B3 A3 B2 A2 - */ - if (size == 3) + case NEON_2RM_VZIP: + if (gen_neon_zip(rd, rm, size, q)) { return 1; - count = (q ? 4 : 2); - for (n = 0; n < count; n++) { - tmp = neon_load_reg(rd, n); - tmp2 = neon_load_reg(rd, n); - switch (size) { - case 0: gen_neon_zip_u8(tmp, tmp2); break; - case 1: gen_neon_zip_u16(tmp, tmp2); break; - case 2: /* no-op */; break; - default: abort(); - } - neon_store_scratch(n * 2, tmp); - neon_store_scratch(n * 2 + 1, tmp2); - } - for (n = 0; n < count * 2; n++) { - int reg = (n < count) ? rd : rm; - tmp = neon_load_scratch(n); - neon_store_reg(reg, n % count, tmp); } break; - case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ - if (size == 3) + case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: + /* also VQMOVUN; op field and mnemonics don't line up */ + if (rm & 1) { return 1; + } TCGV_UNUSED(tmp2); for (pass = 0; pass < 2; pass++) { neon_load_reg64(cpu_V0, rm + pass); - tmp = new_tmp(); - if (op == 36 && q == 0) { - gen_neon_narrow(size, tmp, cpu_V0); - } else if (q) { - gen_neon_narrow_satu(size, tmp, cpu_V0); - } else { - gen_neon_narrow_sats(size, tmp, cpu_V0); - } + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size, + tmp, cpu_V0); if (pass == 0) { tmp2 = tmp; } else { @@ -5478,9 +5939,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } break; - case 38: /* VSHLL */ - if (q || size == 3) + case NEON_2RM_VSHLL: + if (q || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { @@ -5491,54 +5953,58 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg64(cpu_V0, rd + pass); } break; - case 44: /* VCVT.F16.F32 */ - if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) - return 1; - tmp = new_tmp(); - tmp2 = new_tmp(); + case NEON_2RM_VCVT_F16_F32: + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) || + q || (rm & 1)) { + return 1; + } + tmp = tcg_temp_new_i32(); + tmp2 = tcg_temp_new_i32(); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp2, tmp2, tmp); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); neon_store_reg(rd, 0, tmp2); - tmp2 = new_tmp(); - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + tmp2 = tcg_temp_new_i32(); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp2, tmp2, tmp); neon_store_reg(rd, 1, tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; - case 46: /* VCVT.F32.F16 */ - if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) - return 1; - tmp3 = new_tmp(); + case NEON_2RM_VCVT_F32_F16: + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) || + q || (rd & 1)) { + return 1; + } + tmp3 = tcg_temp_new_i32(); tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); tcg_gen_ext16u_i32(tmp3, tmp); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); tcg_gen_shri_i32(tmp3, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tcg_gen_ext16u_i32(tmp3, tmp2); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); tcg_gen_shri_i32(tmp3, tmp2, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); - dead_tmp(tmp2); - dead_tmp(tmp3); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); break; default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op == 30 || op == 31 || op >= 58) { + if (neon_2rm_is_float_op(op)) { tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); TCGV_UNUSED(tmp); @@ -5546,177 +6012,210 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = neon_load_reg(rm, pass); } switch (op) { - case 1: /* VREV32 */ + case NEON_2RM_VREV32: switch (size) { case 0: tcg_gen_bswap32_i32(tmp, tmp); break; case 1: gen_swap_half(tmp); break; - default: return 1; + default: abort(); } break; - case 2: /* VREV16 */ - if (size != 0) - return 1; + case NEON_2RM_VREV16: gen_rev16(tmp); break; - case 8: /* CLS */ + case NEON_2RM_VCLS: switch (size) { case 0: gen_helper_neon_cls_s8(tmp, tmp); break; case 1: gen_helper_neon_cls_s16(tmp, tmp); break; case 2: gen_helper_neon_cls_s32(tmp, tmp); break; - default: return 1; + default: abort(); } break; - case 9: /* CLZ */ + case NEON_2RM_VCLZ: switch (size) { case 0: gen_helper_neon_clz_u8(tmp, tmp); break; case 1: gen_helper_neon_clz_u16(tmp, tmp); break; case 2: gen_helper_clz(tmp, tmp); break; - default: return 1; + default: abort(); } break; - case 10: /* CNT */ - if (size != 0) - return 1; + case NEON_2RM_VCNT: gen_helper_neon_cnt_u8(tmp, tmp); break; - case 11: /* VNOT */ - if (size != 0) - return 1; + case NEON_2RM_VMVN: tcg_gen_not_i32(tmp, tmp); break; - case 14: /* VQABS */ + case NEON_2RM_VQABS: switch (size) { - case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break; - case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break; - case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break; - default: return 1; + case 0: + gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); + break; + case 1: + gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); + break; + case 2: + gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); + break; + default: abort(); } break; - case 15: /* VQNEG */ + case NEON_2RM_VQNEG: switch (size) { - case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break; - case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break; - case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break; - default: return 1; + case 0: + gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); + break; + case 1: + gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); + break; + case 2: + gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); + break; + default: abort(); } break; - case 16: case 19: /* VCGT #0, VCLE #0 */ + case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: tmp2 = tcg_const_i32(0); switch(size) { case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } tcg_temp_free(tmp2); - if (op == 19) + if (op == NEON_2RM_VCLE0) { tcg_gen_not_i32(tmp, tmp); + } break; - case 17: case 20: /* VCGE #0, VCLT #0 */ + case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: tmp2 = tcg_const_i32(0); switch(size) { case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } tcg_temp_free(tmp2); - if (op == 20) + if (op == NEON_2RM_VCLT0) { tcg_gen_not_i32(tmp, tmp); + } break; - case 18: /* VCEQ #0 */ + case NEON_2RM_VCEQ0: tmp2 = tcg_const_i32(0); switch(size) { case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); } tcg_temp_free(tmp2); break; - case 22: /* VABS */ + case NEON_2RM_VABS: switch(size) { case 0: gen_helper_neon_abs_s8(tmp, tmp); break; case 1: gen_helper_neon_abs_s16(tmp, tmp); break; case 2: tcg_gen_abs_i32(tmp, tmp); break; - default: return 1; + default: abort(); } break; - case 23: /* VNEG */ - if (size == 3) - return 1; + case NEON_2RM_VNEG: tmp2 = tcg_const_i32(0); gen_neon_rsb(size, tmp, tmp2); tcg_temp_free(tmp2); break; - case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ + case NEON_2RM_VCGT0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); tcg_temp_free(tmp2); - if (op == 27) - tcg_gen_not_i32(tmp, tmp); + tcg_temp_free_ptr(fpstatus); break; - case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ + } + case NEON_2RM_VCGE0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_cge_f32(tmp, tmp, tmp2); + gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); tcg_temp_free(tmp2); - if (op == 28) - tcg_gen_not_i32(tmp, tmp); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCEQ0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus); + tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCLE0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus); + tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 26: /* Float VCEQ #0 */ + } + case NEON_2RM_VCLT0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); tmp2 = tcg_const_i32(0); - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus); tcg_temp_free(tmp2); + tcg_temp_free_ptr(fpstatus); break; - case 30: /* Float VABS */ + } + case NEON_2RM_VABS_F: gen_vfp_abs(0); break; - case 31: /* Float VNEG */ + case NEON_2RM_VNEG_F: gen_vfp_neg(0); break; - case 32: /* VSWP */ + case NEON_2RM_VSWP: tmp2 = neon_load_reg(rd, pass); neon_store_reg(rm, pass, tmp2); break; - case 33: /* VTRN */ + case NEON_2RM_VTRN: tmp2 = neon_load_reg(rd, pass); switch (size) { case 0: gen_neon_trn_u8(tmp, tmp2); break; case 1: gen_neon_trn_u16(tmp, tmp2); break; - case 2: abort(); - default: return 1; + default: abort(); } neon_store_reg(rm, pass, tmp2); break; - case 56: /* Integer VRECPE */ + case NEON_2RM_VRECPE: gen_helper_recpe_u32(tmp, tmp, cpu_env); break; - case 57: /* Integer VRSQRTE */ + case NEON_2RM_VRSQRTE: gen_helper_rsqrte_u32(tmp, tmp, cpu_env); break; - case 58: /* Float VRECPE */ + case NEON_2RM_VRECPE_F: gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env); break; - case 59: /* Float VRSQRTE */ + case NEON_2RM_VRSQRTE_F: gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env); break; - case 60: /* VCVT.F32.S32 */ - gen_vfp_sito(0); + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vfp_sito(0, 1); break; - case 61: /* VCVT.F32.U32 */ - gen_vfp_uito(0); + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vfp_uito(0, 1); break; - case 62: /* VCVT.S32.F32 */ - gen_vfp_tosiz(0); + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vfp_tosiz(0, 1); break; - case 63: /* VCVT.U32.F32 */ - gen_vfp_touiz(0); + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vfp_touiz(0, 1); break; default: - /* Reserved: 21, 29, 39-56 */ - return 1; + /* Reserved op values were caught by the + * neon_2rm_sizes[] check earlier. + */ + abort(); } - if (op == 30 || op == 31 || op >= 58) { + if (neon_2rm_is_float_op(op)) { tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); } else { @@ -5727,22 +6226,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else if ((insn & (1 << 10)) == 0) { /* VTBL, VTBX. */ - n = ((insn >> 5) & 0x18) + 8; + int n = ((insn >> 8) & 3) + 1; + if ((rn + n) > 32) { + /* This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return 1; + } + n <<= 3; if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 0); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } tmp2 = neon_load_reg(rm, 0); tmp4 = tcg_const_i32(rn); tmp5 = tcg_const_i32(n); gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 1); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } tmp3 = neon_load_reg(rm, 1); @@ -5751,9 +6257,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp4); neon_store_reg(rd, 0, tmp2); neon_store_reg(rd, 1, tmp3); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { + return 1; + } if (insn & (1 << 19)) { tmp = neon_load_reg(rm, 1); } else { @@ -5768,11 +6277,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_dup_low16(tmp); } for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp2, tmp); neon_store_reg(rd, pass, tmp2); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { return 1; } @@ -5790,6 +6299,34 @@ static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn) int rt = (insn >> 12) & 0xf; TCGv tmp; + /* Minimal set of debug registers, since we don't support debug */ + if (op1 == 0 && crn == 0 && op2 == 0) { + switch (crm) { + case 0: + /* DBGDIDR: just RAZ. In particular this means the + * "debug architecture version" bits will read as + * a reserved value, which should cause Linux to + * not try to use the debug hardware. + */ + tmp = tcg_const_i32(0); + store_reg(s, rt, tmp); + return 0; + case 1: + case 2: + /* DBGDRAR and DBGDSAR: v7 only. Always RAZ since we + * don't implement memory mapped debug components + */ + if (ENABLE_ARCH_7) { + tmp = tcg_const_i32(0); + store_reg(s, rt, tmp); + return 0; + } + break; + default: + break; + } + } + if (arm_feature(env, ARM_FEATURE_THUMB2EE)) { if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) { /* TEECR */ @@ -5829,7 +6366,7 @@ static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn) return 1; tmp = load_reg(s, rt); gen_helper_set_teecr(cpu_env, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 0; } if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) { @@ -5890,10 +6427,10 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn) static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) { TCGv tmp; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, val); store_reg(s, rlow, tmp); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_shri_i64(val, val, 32); tcg_gen_trunc_i64_i32(tmp, val); store_reg(s, rhigh, tmp); @@ -5909,7 +6446,7 @@ static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow) tmp = tcg_temp_new_i64(); tmp2 = load_reg(s, rlow); tcg_gen_extu_i32_i64(tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_add_i64(val, val, tmp); tcg_temp_free_i64(tmp); } @@ -5926,8 +6463,8 @@ static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh) tmph = load_reg(s, rhigh); tmp = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(tmp, tmpl, tmph); - dead_tmp(tmpl); - dead_tmp(tmph); + tcg_temp_free_i32(tmpl); + tcg_temp_free_i32(tmph); tcg_gen_add_i64(val, val, tmp); tcg_temp_free_i64(tmp); } @@ -5935,10 +6472,10 @@ static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh) /* Set N and Z flags from a 64-bit value. */ static void gen_logicq_cc(TCGv_i64 val) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); gen_helper_logicq_cc(tmp, val); gen_logic_CC(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Load/Store exclusive instructions are implemented by remembering @@ -5972,10 +6509,10 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_mov_i32(cpu_exclusive_val, tmp); store_reg(s, rt, tmp); if (size == 3) { - TCGv tmp2 = new_tmp(); + TCGv tmp2 = tcg_temp_new_i32(); tcg_gen_addi_i32(tmp2, addr, 4); tmp = gen_ld32(tmp2, IS_USER(s)); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_mov_i32(cpu_exclusive_high, tmp); store_reg(s, rt2, tmp); } @@ -6028,14 +6565,14 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, abort(); } tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); if (size == 3) { - TCGv tmp2 = new_tmp(); + TCGv tmp2 = tcg_temp_new_i32(); tcg_gen_addi_i32(tmp2, addr, 4); tmp = gen_ld32(tmp2, IS_USER(s)); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } tmp = load_reg(s, rt); switch (size) { @@ -6083,6 +6620,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; cond = insn >> 28; if (cond == 0xf){ + /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we + * choose to UNDEF. In ARMv5 and above the space is used + * for miscellaneous unconditional instructions. + */ + ARCH(5); + /* Unconditional instructions. */ if (((insn >> 25) & 7) == 1) { /* NEON Data processing. */ @@ -6102,9 +6645,32 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; return; } - if ((insn & 0x0d70f000) == 0x0550f000) - return; /* PLD */ - else if ((insn & 0x0ffffdff) == 0x01010000) { + if (((insn & 0x0f30f000) == 0x0510f000) || + ((insn & 0x0f30f010) == 0x0710f000)) { + if ((insn & (1 << 22)) == 0) { + /* PLDW; v7MP */ + if (!arm_feature(env, ARM_FEATURE_V7MP)) { + goto illegal_op; + } + } + /* Otherwise PLD; v5TE+ */ + ARCH(5TE); + return; + } + if (((insn & 0x0f70f000) == 0x0450f000) || + ((insn & 0x0f70f010) == 0x0650f000)) { + ARCH(7); + return; /* PLI; V7 */ + } + if (((insn & 0x0f700000) == 0x04100000) || + ((insn & 0x0f700010) == 0x06100000)) { + if (!arm_feature(env, ARM_FEATURE_V7MP)) { + goto illegal_op; + } + return; /* v7MP: Unallocated memory hint: must NOP */ + } + + if ((insn & 0x0ffffdff) == 0x01010000) { ARCH(6); /* setend */ if (insn & (1 << 9)) { @@ -6134,7 +6700,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; ARCH(6); op1 = (insn & 0x1f); - addr = new_tmp(); + addr = tcg_temp_new_i32(); tmp = tcg_const_i32(op1); gen_helper_get_r13_banked(addr, cpu_env, tmp); tcg_temp_free_i32(tmp); @@ -6167,9 +6733,9 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = tcg_const_i32(op1); gen_helper_set_r13_banked(cpu_env, tmp, addr); tcg_temp_free_i32(tmp); - dead_tmp(addr); + tcg_temp_free_i32(addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } return; } else if ((insn & 0x0e50ffe0) == 0x08100a00) { @@ -6207,7 +6773,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_addi_i32(addr, addr, offset); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } gen_rfe(s, tmp, tmp2); return; @@ -6216,7 +6782,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) int32_t offset; val = (uint32_t)s->pc; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); store_reg(s, 14, tmp); /* Sign-extend the 24-bit offset */ @@ -6225,6 +6791,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) val += (offset << 2) | ((insn >> 23) & 2) | 1; /* pipeline offset */ val += 4; + /* protected by ARCH(5); above, near the start of uncond block */ gen_bx_im(s, val); return; } else if ((insn & 0x0e000f00) == 0x0c000100) { @@ -6236,6 +6803,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } } else if ((insn & 0x0fe00000) == 0x0c400000) { /* Coprocessor double register transfer. */ + ARCH(5TE); } else if ((insn & 0x0f000010) == 0x0e000010) { /* Additional coprocessor register transfer. */ } else if ((insn & 0x0ff10020) == 0x01000000) { @@ -6280,7 +6848,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) val = ((insn >> 4) & 0xf000) | (insn & 0xfff); if ((insn & (1 << 22)) == 0) { /* MOVW */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); } else { /* MOVT */ @@ -6327,7 +6895,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; tmp = load_cpu_field(spsr); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_cpsr_read(tmp); } store_reg(s, rd, tmp); @@ -6336,10 +6904,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) case 0x1: if (op1 == 1) { /* branch/exchange thumb (bx). */ + ARCH(4T); tmp = load_reg(s, rm); gen_bx(s, tmp); } else if (op1 == 3) { /* clz */ + ARCH(5); rd = (insn >> 12) & 0xf; tmp = load_reg(s, rm); gen_helper_clz(tmp, tmp); @@ -6362,14 +6932,16 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (op1 != 1) goto illegal_op; + ARCH(5); /* branch link/exchange thumb (blx) */ tmp = load_reg(s, rm); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, s->pc); store_reg(s, 14, tmp2); gen_bx(s, tmp); break; case 0x5: /* saturating add/subtract */ + ARCH(5TE); rd = (insn >> 12) & 0xf; rn = (insn >> 16) & 0xf; tmp = load_reg(s, rm); @@ -6380,7 +6952,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_helper_sub_saturate(tmp, tmp, tmp2); else gen_helper_add_saturate(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 7: @@ -6391,12 +6963,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; } /* bkpt */ + ARCH(5); gen_exception_insn(s, 4, EXCP_BKPT); break; case 0x8: /* signed multiply */ case 0xa: case 0xc: case 0xe: + ARCH(5TE); rs = (insn >> 8) & 0xf; rn = (insn >> 12) & 0xf; rd = (insn >> 16) & 0xf; @@ -6410,13 +6984,13 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_sxth(tmp2); tmp64 = gen_muls_i64_i32(tmp, tmp2); tcg_gen_shri_i64(tmp64, tmp64, 16); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); if ((sh & 2) == 0) { tmp2 = load_reg(s, rn); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rd, tmp); } else { @@ -6424,11 +6998,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rm); tmp2 = load_reg(s, rs); gen_mulxy(tmp, tmp2, sh & 2, sh & 4); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (op1 == 2) { tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_addq(s, tmp64, rn, rd); gen_storeq_reg(s, rn, rd, tmp64); tcg_temp_free_i64(tmp64); @@ -6436,7 +7010,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (op1 == 0) { tmp2 = load_reg(s, rn); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rd, tmp); } @@ -6462,7 +7036,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (shift) { val = (val >> shift) | (val << (32 - shift)); } - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, val); if (logic_cc && shift) { gen_set_CF_bit31(tmp2); @@ -6565,26 +7139,26 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_and_i32(tmp, tmp, tmp2); gen_logic_CC(tmp); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x09: if (set_cc) { tcg_gen_xor_i32(tmp, tmp, tmp2); gen_logic_CC(tmp); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x0a: if (set_cc) { gen_helper_sub_cc(tmp, tmp, tmp2); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x0b: if (set_cc) { gen_helper_add_cc(tmp, tmp, tmp2); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x0c: tcg_gen_or_i32(tmp, tmp, tmp2); @@ -6624,7 +7198,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) break; } if (op1 != 0x0f && op1 != 0x0d) { - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } else { /* other instructions */ @@ -6647,18 +7221,18 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rs); tmp2 = load_reg(s, rm); tcg_gen_mul_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (insn & (1 << 22)) { /* Subtract (mls) */ ARCH(6T2); tmp2 = load_reg(s, rn); tcg_gen_sub_i32(tmp, tmp2, tmp); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } else if (insn & (1 << 21)) { /* Add */ tmp2 = load_reg(s, rn); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } if (insn & (1 << 20)) gen_logic_CC(tmp); @@ -6762,7 +7336,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp2 = gen_ld32(addr, IS_USER(s)); gen_st32(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); store_reg(s, rd, tmp2); } } @@ -6792,6 +7366,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } load = 1; } else if (sh & 2) { + ARCH(5TE); /* doubleword */ if (sh & 1) { /* store */ @@ -6829,7 +7404,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_addi_i32(addr, addr, address_offset); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } if (load) { /* Complete the load. */ @@ -6858,7 +7433,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if ((op1 & 3) == 0 || sh == 5 || sh == 6) goto illegal_op; gen_arm_parallel_addsub(op1, sh, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 1: @@ -6882,7 +7457,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); } tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((insn & 0x00200020) == 0x00200000) { /* [us]sat */ @@ -6918,16 +7493,16 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) /* Select bytes. */ tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); - tmp3 = new_tmp(); + tmp3 = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE)); gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); - dead_tmp(tmp3); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((insn & 0x000003e0) == 0x00000060) { tmp = load_reg(s, rm); shift = (insn >> 10) & 3; - /* ??? In many cases it's not neccessary to do a + /* ??? In many cases it's not necessary to do a rotate, a shift is sufficient. */ if (shift != 0) tcg_gen_rotri_i32(tmp, tmp, shift * 8); @@ -6947,7 +7522,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_add16(tmp, tmp2); } else { tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } store_reg(s, rd, tmp); @@ -6992,7 +7567,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); } tcg_gen_shri_i64(tmp64, tmp64, 32); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); store_reg(s, rn, tmp); @@ -7000,18 +7575,22 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (insn & (1 << 5)) gen_swap_half(tmp2); gen_smul_dual(tmp, tmp2); - /* This addition cannot overflow. */ if (insn & (1 << 6)) { + /* This subtraction cannot overflow. */ tcg_gen_sub_i32(tmp, tmp, tmp2); } else { - tcg_gen_add_i32(tmp, tmp, tmp2); + /* This addition cannot overflow 32 bits; + * however it may overflow considered as a signed + * operation, in which case we must set the Q flag. + */ + gen_helper_add_setq(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (insn & (1 << 22)) { /* smlald, smlsld */ tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_addq(s, tmp64, rd, rn); gen_storeq_reg(s, rd, rn, tmp64); tcg_temp_free_i64(tmp64); @@ -7021,7 +7600,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) { tmp2 = load_reg(s, rd); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rn, tmp); } @@ -7035,11 +7614,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rm); tmp2 = load_reg(s, rs); gen_helper_usad8(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rd != 15) { tmp2 = load_reg(s, rd); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rn, tmp); break; @@ -7050,7 +7629,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) i = (insn >> 16) & 0x1f; i = i + 1 - shift; if (rm == 15) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else { tmp = load_reg(s, rm); @@ -7058,7 +7637,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (i != 32) { tmp2 = load_reg(s, rd); gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rd, tmp); break; @@ -7124,14 +7703,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } else if (insn & (1 << 21)) { store_reg(s, rn, tmp2); } else { - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } if (insn & (1 << 20)) { /* Complete the load. */ - if (rd == 15) - gen_bx(s, tmp); - else - store_reg(s, rd, tmp); + store_reg_from_load(env, s, rd, tmp); } break; case 0x08: @@ -7184,28 +7760,26 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (insn & (1 << 20)) { /* load */ tmp = gen_ld32(addr, IS_USER(s)); - if (i == 15) { - gen_bx(s, tmp); - } else if (user) { + if (user) { tmp2 = tcg_const_i32(i); gen_helper_set_user_reg(tmp2, tmp); tcg_temp_free_i32(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else if (i == rn) { loaded_var = tmp; loaded_base = 1; } else { - store_reg(s, i, tmp); + store_reg_from_load(env, s, i, tmp); } } else { /* store */ if (i == 15) { /* special case: r15 = PC + 8 */ val = (long)s->pc + 4; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); } else if (user) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tmp2 = tcg_const_i32(i); gen_helper_get_user_reg(tmp, tmp2); tcg_temp_free_i32(tmp2); @@ -7241,7 +7815,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } if (loaded_base) { store_reg(s, rn, loaded_var); @@ -7250,7 +7824,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) /* Restore CPSR from SPSR. */ tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, 0xffffffff); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); s->is_jmp = DISAS_UPDATE; } } @@ -7263,7 +7837,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) /* branch (and link) */ val = (int32_t)s->pc; if (insn & (1 << 24)) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); store_reg(s, 14, tmp); } @@ -7325,8 +7899,7 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG logic_cc = conds; break; case 3: /* orn */ - tcg_gen_not_i32(t1, t1); - tcg_gen_or_i32(t0, t0, t1); + tcg_gen_orc_i32(t0, t0, t1); logic_cc = conds; break; case 4: /* eor */ @@ -7396,13 +7969,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) 16-bit instructions to get correct prefetch abort behavior. */ insn = insn_hw1; if ((insn & (1 << 12)) == 0) { + ARCH(5); /* Second half of blx. */ offset = ((insn & 0x7ff) << 1); tmp = load_reg(s, 14); tcg_gen_addi_i32(tmp, tmp, offset); tcg_gen_andi_i32(tmp, tmp, 0xfffffffc); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, s->pc | 1); store_reg(s, 14, tmp2); gen_bx(s, tmp); @@ -7414,7 +7988,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, 14); tcg_gen_addi_i32(tmp, tmp, offset); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, s->pc | 1); store_reg(s, 14, tmp2); gen_bx(s, tmp); @@ -7453,7 +8027,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (insn & 0x01200000) { /* Load/store doubleword. */ if (rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, s->pc & ~3); } else { addr = load_reg(s, rn); @@ -7487,7 +8061,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_addi_i32(addr, addr, offset - 4); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } else if ((insn & (1 << 23)) == 0) { /* Load/store exclusive word. */ @@ -7503,7 +8077,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } else if ((insn & (1 << 6)) == 0) { /* Table Branch. */ if (rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, s->pc); } else { addr = load_reg(s, rn); @@ -7513,13 +8087,13 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (insn & (1 << 4)) { /* tbh */ tcg_gen_add_i32(addr, addr, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = gen_ld16u(addr, IS_USER(s)); } else { /* tbb */ - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = gen_ld8u(addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); tcg_gen_shli_i32(tmp, tmp, 1); tcg_gen_addi_i32(tmp, tmp, s->pc); store_reg(s, 15, tmp); @@ -7563,13 +8137,13 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } gen_rfe(s, tmp, tmp2); } else { /* srs */ op = (insn & 0x1f); - addr = new_tmp(); + addr = tcg_temp_new_i32(); tmp = tcg_const_i32(op); gen_helper_get_r13_banked(addr, cpu_env, tmp); tcg_temp_free_i32(tmp); @@ -7579,7 +8153,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, 14); gen_st32(tmp, addr, 0); tcg_gen_addi_i32(addr, addr, 4); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_cpsr_read(tmp); gen_st32(tmp, addr, 0); if (insn & (1 << 21)) { @@ -7592,11 +8166,12 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_helper_set_r13_banked(cpu_env, tmp, addr); tcg_temp_free_i32(tmp); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } } else { - int i; + int i, loaded_base = 0; + TCGv loaded_var; /* Load/store multiple. */ addr = load_reg(s, rn); offset = 0; @@ -7608,6 +8183,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_addi_i32(addr, addr, -offset); } + TCGV_UNUSED(loaded_var); for (i = 0; i < 16; i++) { if ((insn & (1 << i)) == 0) continue; @@ -7616,6 +8192,9 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = gen_ld32(addr, IS_USER(s)); if (i == 15) { gen_bx(s, tmp); + } else if (i == rn) { + loaded_var = tmp; + loaded_base = 1; } else { store_reg(s, i, tmp); } @@ -7626,6 +8205,9 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } tcg_gen_addi_i32(addr, addr, 4); } + if (loaded_base) { + store_reg(s, rn, loaded_var); + } if (insn & (1 << 21)) { /* Base register writeback. */ if (insn & (1 << 24)) { @@ -7636,7 +8218,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) goto illegal_op; store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } } @@ -7664,12 +8246,12 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); } tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else { /* Data processing register constant shift. */ if (rn == 15) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else { tmp = load_reg(s, rn); @@ -7683,11 +8265,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_arm_shift_im(tmp2, shiftop, shift, logic_cc); if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2)) goto illegal_op; - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rd != 15) { store_reg(s, rd, tmp); } else { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } } break; @@ -7711,7 +8293,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) case 1: /* Sign/zero extend. */ tmp = load_reg(s, rm); shift = (insn >> 4) & 3; - /* ??? In many cases it's not neccessary to do a + /* ??? In many cases it's not necessary to do a rotate, a shift is sufficient. */ if (shift != 0) tcg_gen_rotri_i32(tmp, tmp, shift * 8); @@ -7731,7 +8313,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_add16(tmp, tmp2); } else { tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } store_reg(s, rd, tmp); @@ -7744,7 +8326,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); gen_thumb2_parallel_addsub(op, shift, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 3: /* Other data processing. */ @@ -7759,7 +8341,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_helper_sub_saturate(tmp, tmp2, tmp); else gen_helper_add_saturate(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } else { tmp = load_reg(s, rn); switch (op) { @@ -7777,11 +8359,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) break; case 0x10: /* sel */ tmp2 = load_reg(s, rm); - tmp3 = new_tmp(); + tmp3 = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE)); gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); - dead_tmp(tmp3); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); break; case 0x18: /* clz */ gen_helper_clz(tmp, tmp); @@ -7799,23 +8381,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) switch ((insn >> 20) & 7) { case 0: /* 32 x 32 -> 32 */ tcg_gen_mul_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); if (op) tcg_gen_sub_i32(tmp, tmp2, tmp); else tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 1: /* 16 x 16 -> 32 */ gen_mulxy(tmp, tmp2, op & 2, op & 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 2: /* Dual multiply add. */ @@ -7823,18 +8405,22 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (op) gen_swap_half(tmp2); gen_smul_dual(tmp, tmp2); - /* This addition cannot overflow. */ if (insn & (1 << 22)) { + /* This subtraction cannot overflow. */ tcg_gen_sub_i32(tmp, tmp, tmp2); } else { - tcg_gen_add_i32(tmp, tmp, tmp2); + /* This addition cannot overflow 32 bits; + * however it may overflow considered as a signed + * operation, in which case we must set the Q flag. + */ + gen_helper_add_setq(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 3: /* 32 * 16 -> 32msb */ @@ -7844,14 +8430,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_sxth(tmp2); tmp64 = gen_muls_i64_i32(tmp, tmp2); tcg_gen_shri_i64(tmp64, tmp64, 16); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); if (rs != 15) { tmp2 = load_reg(s, rs); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */ @@ -7868,17 +8454,17 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); } tcg_gen_shri_i64(tmp64, tmp64, 32); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); break; case 7: /* Unsigned sum of absolute differences. */ gen_helper_usad8(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; } @@ -7896,7 +8482,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_helper_udiv(tmp, tmp, tmp2); else gen_helper_sdiv(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((op & 0xe) == 0xc) { /* Dual multiply accumulate long. */ @@ -7908,11 +8494,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } else { tcg_gen_add_i32(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); /* BUGFIX */ tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_addq(s, tmp64, rs, rd); gen_storeq_reg(s, rs, rd, tmp64); tcg_temp_free_i64(tmp64); @@ -7924,10 +8510,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (op & 8) { /* smlalxy */ gen_mulxy(tmp, tmp2, op & 2, op & 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { /* Signed 64-bit multiply */ tmp64 = gen_muls_i64_i32(tmp, tmp2); @@ -7951,7 +8537,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) /* Coprocessor. */ if (((insn >> 24) & 3) == 3) { /* Translate into the equivalent ARM encoding. */ - insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4); + insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); if (disas_neon_data_insn(env, s, insn)) goto illegal_op; } else { @@ -7988,6 +8574,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } else { /* blx */ offset &= ~(uint32_t)2; + /* thumb2 bx, no need to check */ gen_bx_im(s, offset); } } else if (((insn >> 23) & 7) == 7) { @@ -8007,7 +8594,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) addr = tcg_const_i32(insn & 0xff); gen_helper_v7m_msr(cpu_env, addr, tmp); tcg_temp_free_i32(addr); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_lookup_tb(s); break; } @@ -8081,7 +8668,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_exception_return(s, tmp); break; case 6: /* mrs cpsr. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (IS_M(env)) { addr = tcg_const_i32(insn & 0xff); gen_helper_v7m_mrs(tmp, cpu_env, addr); @@ -8133,7 +8720,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) imm = insn & 0x1f; shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); if (rn == 15) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else { tmp = load_reg(s, rn); @@ -8160,7 +8747,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (imm != 32) { tmp2 = load_reg(s, rd); gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 7: @@ -8203,7 +8790,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_ori_i32(tmp, tmp, imm << 16); } else { /* movw */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, imm); } } else { @@ -8214,7 +8801,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) offset -= imm; else offset += imm; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, offset); } else { tmp = load_reg(s, rn); @@ -8253,11 +8840,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) shifter_out = 1; break; } - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, imm); rn = (insn >> 16) & 0xf; if (rn == 15) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else { tmp = load_reg(s, rn); @@ -8266,12 +8853,12 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0, shifter_out, tmp, tmp2)) goto illegal_op; - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); rd = (insn >> 8) & 0xf; if (rd != 15) { store_reg(s, rd, tmp); } else { - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } } } @@ -8286,9 +8873,45 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) goto illegal_op; break; } + op = ((insn >> 21) & 3) | ((insn >> 22) & 4); + if (rs == 15) { + if (!(insn & (1 << 20))) { + goto illegal_op; + } + if (op != 2) { + /* Byte or halfword load space with dest == r15 : memory hints. + * Catch them early so we don't emit pointless addressing code. + * This space is a mix of: + * PLD/PLDW/PLI, which we implement as NOPs (note that unlike + * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP + * cores) + * unallocated hints, which must be treated as NOPs + * UNPREDICTABLE space, which we NOP or UNDEF depending on + * which is easiest for the decoding logic + * Some space which must UNDEF + */ + int op1 = (insn >> 23) & 3; + int op2 = (insn >> 6) & 0x3f; + if (op & 2) { + goto illegal_op; + } + if (rn == 15) { + /* UNPREDICTABLE or unallocated hint */ + return 0; + } + if (op1 & 1) { + return 0; /* PLD* or unallocated hint */ + } + if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) { + return 0; /* PLD* or unallocated hint */ + } + /* UNDEF space, or an UNPREDICTABLE */ + return 1; + } + } user = IS_USER(s); if (rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); /* PC relative. */ /* s->pc has already been incremented by 4. */ imm = s->pc & 0xfffffffc; @@ -8304,77 +8927,74 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) imm = insn & 0xfff; tcg_gen_addi_i32(addr, addr, imm); } else { - op = (insn >> 8) & 7; imm = insn & 0xff; - switch (op) { - case 0: case 8: /* Shifted Register. */ + switch ((insn >> 8) & 0xf) { + case 0x0: /* Shifted Register. */ shift = (insn >> 4) & 0xf; - if (shift > 3) + if (shift > 3) { + tcg_temp_free_i32(addr); goto illegal_op; + } tmp = load_reg(s, rm); if (shift) tcg_gen_shli_i32(tmp, tmp, shift); tcg_gen_add_i32(addr, addr, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; - case 4: /* Negative offset. */ + case 0xc: /* Negative offset. */ tcg_gen_addi_i32(addr, addr, -imm); break; - case 6: /* User privilege. */ + case 0xe: /* User privilege. */ tcg_gen_addi_i32(addr, addr, imm); user = 1; break; - case 1: /* Post-decrement. */ + case 0x9: /* Post-decrement. */ imm = -imm; /* Fall through. */ - case 3: /* Post-increment. */ + case 0xb: /* Post-increment. */ postinc = 1; writeback = 1; break; - case 5: /* Pre-decrement. */ + case 0xd: /* Pre-decrement. */ imm = -imm; /* Fall through. */ - case 7: /* Pre-increment. */ + case 0xf: /* Pre-increment. */ tcg_gen_addi_i32(addr, addr, imm); writeback = 1; break; default: + tcg_temp_free_i32(addr); goto illegal_op; } } } - op = ((insn >> 21) & 3) | ((insn >> 22) & 4); if (insn & (1 << 20)) { /* Load. */ - if (rs == 15 && op != 2) { - if (op & 2) - goto illegal_op; - /* Memory hint. Implemented as NOP. */ + switch (op) { + case 0: tmp = gen_ld8u(addr, user); break; + case 4: tmp = gen_ld8s(addr, user); break; + case 1: tmp = gen_ld16u(addr, user); break; + case 5: tmp = gen_ld16s(addr, user); break; + case 2: tmp = gen_ld32(addr, user); break; + default: + tcg_temp_free_i32(addr); + goto illegal_op; + } + if (rs == 15) { + gen_bx(s, tmp); } else { - switch (op) { - case 0: tmp = gen_ld8u(addr, user); break; - case 4: tmp = gen_ld8s(addr, user); break; - case 1: tmp = gen_ld16u(addr, user); break; - case 5: tmp = gen_ld16s(addr, user); break; - case 2: tmp = gen_ld32(addr, user); break; - default: goto illegal_op; - } - if (rs == 15) { - gen_bx(s, tmp); - } else { - store_reg(s, rs, tmp); - } + store_reg(s, rs, tmp); } } else { /* Store. */ - if (rs == 15) - goto illegal_op; tmp = load_reg(s, rs); switch (op) { case 0: gen_st8(tmp, addr, user); break; case 1: gen_st16(tmp, addr, user); break; case 2: gen_st32(tmp, addr, user); break; - default: goto illegal_op; + default: + tcg_temp_free_i32(addr); + goto illegal_op; } } if (postinc) @@ -8382,7 +9002,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (writeback) { store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } break; @@ -8426,7 +9046,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rn); if (insn & (1 << 10)) { /* immediate */ - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, (insn >> 6) & 7); } else { /* reg */ @@ -8444,7 +9064,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) else gen_helper_add_cc(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else { /* shift immediate */ @@ -8462,27 +9082,27 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) op = (insn >> 11) & 3; rd = (insn >> 8) & 0x7; if (op == 0) { /* mov */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, insn & 0xff); if (!s->condexec_mask) gen_logic_CC(tmp); store_reg(s, rd, tmp); } else { tmp = load_reg(s, rd); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, insn & 0xff); switch (op) { case 1: /* cmp */ gen_helper_sub_cc(tmp, tmp, tmp2); - dead_tmp(tmp); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); break; case 2: /* add */ if (s->condexec_mask) tcg_gen_add_i32(tmp, tmp, tmp2); else gen_helper_add_cc(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 3: /* sub */ @@ -8490,7 +9110,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tcg_gen_sub_i32(tmp, tmp, tmp2); else gen_helper_sub_cc(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; } @@ -8502,10 +9122,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) /* load pc-relative. Bit 1 of PC is ignored. */ val = s->pc + 2 + ((insn & 0xff) * 4); val &= ~(uint32_t)2; - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, val); tmp = gen_ld32(addr, IS_USER(s)); - dead_tmp(addr); + tcg_temp_free_i32(addr); store_reg(s, rd, tmp); break; } @@ -8519,15 +9139,15 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); tmp2 = load_reg(s, rm); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 1: /* cmp */ tmp = load_reg(s, rd); tmp2 = load_reg(s, rm); gen_helper_sub_cc(tmp, tmp, tmp2); - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 2: /* mov/cpy */ tmp = load_reg(s, rm); @@ -8536,11 +9156,13 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) case 3:/* branch [and link] exchange thumb register */ tmp = load_reg(s, rm); if (insn & (1 << 7)) { + ARCH(5); val = (uint32_t)s->pc | 1; - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, val); store_reg(s, 14, tmp2); } + /* already thumb, no need to check */ gen_bx(s, tmp); break; } @@ -8562,7 +9184,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) } if (op == 9) { /* neg */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else if (op != 0xf) { /* mvn doesn't read its first operand */ tmp = load_reg(s, rd); @@ -8673,14 +9295,14 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (val) { store_reg(s, rm, tmp2); if (op != 0xf) - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { store_reg(s, rd, tmp); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } else { - dead_tmp(tmp); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); } break; @@ -8693,7 +9315,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) addr = load_reg(s, rn); tmp = load_reg(s, rm); tcg_gen_add_i32(addr, addr, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); if (op < 3) /* store */ tmp = load_reg(s, rd); @@ -8726,7 +9348,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) } if (op >= 3) /* load */ store_reg(s, rd, tmp); - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 6: @@ -8746,7 +9368,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st32(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 7: @@ -8766,7 +9388,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st8(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 8: @@ -8786,7 +9408,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st16(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 9: @@ -8805,7 +9427,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st32(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 10: @@ -8816,7 +9438,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, 13); } else { /* PC. bit 1 is ignored. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2); } val = (insn & 0xff) * 4; @@ -8900,8 +9522,9 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) /* write back the new stack pointer */ store_reg(s, 13, addr); /* set the new PC value */ - if ((insn & 0x0900) == 0x0900) - gen_bx(s, tmp); + if ((insn & 0x0900) == 0x0900) { + store_reg_from_load(env, s, 15, tmp); + } break; case 1: case 3: case 9: case 11: /* czb */ @@ -8913,7 +9536,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel); else tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3; val = (uint32_t)s->pc + 2; val += offset; @@ -8932,6 +9555,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) break; case 0xe: /* bkpt */ + ARCH(5); gen_exception_insn(s, 2, EXCP_BKPT); break; @@ -8984,7 +9608,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) break; case 12: + { /* load/store multiple */ + TCGv loaded_var; + TCGV_UNUSED(loaded_var); rn = (insn >> 8) & 0x7; addr = load_reg(s, rn); for (i = 0; i < 8; i++) { @@ -8992,7 +9619,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (insn & (1 << 11)) { /* load */ tmp = gen_ld32(addr, IS_USER(s)); - store_reg(s, i, tmp); + if (i == rn) { + loaded_var = tmp; + } else { + store_reg(s, i, tmp); + } } else { /* store */ tmp = load_reg(s, i); @@ -9002,14 +9633,18 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tcg_gen_addi_i32(addr, addr, 4); } } - /* Base register writeback. */ if ((insn & (1 << rn)) == 0) { + /* base reg not in list: base register writeback */ store_reg(s, rn, addr); } else { - dead_tmp(addr); + /* base reg in list: if load, complete it now */ + if (insn & (1 << 11)) { + store_reg(s, rn, loaded_var); + } + tcg_temp_free_i32(addr); } break; - + } case 13: /* conditional branch or swi */ cond = (insn >> 8) & 0xf; @@ -9078,8 +9713,6 @@ static inline void gen_intermediate_code_internal(CPUState *env, int max_insns; /* generate intermediate code */ - num_temps = 0; - pc_start = tb->pc; dc->tb = tb; @@ -9116,6 +9749,8 @@ static inline void gen_intermediate_code_internal(CPUState *env, gen_icount_start(); + tcg_clear_temp_count(); + /* A note on handling of the condexec (IT) bits: * * We want to avoid the overhead of having to write the updated condexec @@ -9138,8 +9773,8 @@ static inline void gen_intermediate_code_internal(CPUState *env, * This is handled in the same way as restoration of the * PC in these situations: we will be called again with search_pc=1 * and generate a mapping of the condexec bits for each PC in - * gen_opc_condexec_bits[]. gen_pc_load[] then uses this to restore - * the condexec bits. + * gen_opc_condexec_bits[]. restore_state_to_opc() then uses + * this to restore the condexec bits. * * Note that there are no instructions which can read the condexec * bits, and none which can write non-static values to them, so @@ -9151,7 +9786,7 @@ static inline void gen_intermediate_code_internal(CPUState *env, complications trying to do it at the end of the block. */ if (dc->condexec_mask || dc->condexec_cond) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); store_cpu_field(tmp, condexec_bits); } @@ -9220,15 +9855,16 @@ static inline void gen_intermediate_code_internal(CPUState *env, } else { disas_arm_insn(env, dc); } - if (num_temps) { - fprintf(stderr, "Internal resource leak before %08x\n", dc->pc); - num_temps = 0; - } if (dc->condjmp && !dc->is_jmp) { gen_set_label(dc->condlabel); dc->condjmp = 0; } + + if (tcg_check_temp_count()) { + fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc); + } + /* Translation stops when a conditional branch is encountered. * Otherwise the subsequent code could get translated several times. * Also stop translation when a page boundary is reached. This @@ -9403,8 +10039,7 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, #endif } -void gen_pc_load(CPUState *env, TranslationBlock *tb, - unsigned long searched_pc, int pc_pos, void *puc) +void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos) { env->regs[15] = gen_opc_pc[pc_pos]; env->condexec_bits = gen_opc_condexec_bits[pc_pos];