*/
#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "tcg/tcg-op.h"
-#include "tcg/tcg-op-gvec.h"
-#include "tcg/tcg-gvec-desc.h"
-#include "qemu/log.h"
-#include "arm_ldst.h"
#include "translate.h"
-#include "internals.h"
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-#include "exec/log.h"
#include "translate-a64.h"
#include "fpu/softfloat.h"
tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
status, vsz, vsz, data, fn);
- tcg_temp_free_ptr(status);
}
return true;
}
vec_full_reg_offset(s, rn),
vec_full_reg_offset(s, rm),
status, vsz, vsz, data, fn);
-
- tcg_temp_free_ptr(status);
}
return true;
}
{
TCGv_ptr status = fpstatus_ptr(flavour);
bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
- tcg_temp_free_ptr(status);
return ret;
}
vec_full_reg_offset(s, ra),
pred_full_reg_offset(s, pg),
status, vsz, vsz, data, fn);
-
- tcg_temp_free_ptr(status);
}
return true;
}
vec_full_reg_offset(s, rn),
pred_full_reg_offset(s, pg),
status, vsz, vsz, data, fn);
- tcg_temp_free_ptr(status);
}
return true;
}
vec_full_reg_offset(s, rm),
pred_full_reg_offset(s, pg),
status, vsz, vsz, data, fn);
- tcg_temp_free_ptr(status);
}
return true;
}
gen_helper_sve_predtest1(t, d, g);
do_pred_flags(t);
- tcg_temp_free_i32(t);
}
static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
tcg_gen_addi_ptr(gptr, cpu_env, gofs);
gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
- tcg_temp_free_ptr(dptr);
- tcg_temp_free_ptr(gptr);
do_pred_flags(t);
- tcg_temp_free_i32(t);
}
/* For each element size, the bits within a predicate word that are active. */
tcg_gen_andi_i64(d, d, mask);
tcg_gen_andi_i64(t, t, ~mask);
tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
}
static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
tcg_gen_andi_i64(d, d, mask);
tcg_gen_andi_i64(t, t, ~mask);
tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
}
static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
fn(temp, t_zn, t_pg, desc);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_pg);
write_fp_dreg(s, a->rd, temp);
- tcg_temp_free_i64(temp);
return true;
}
tcg_gen_extrl_i64_i32(s32, start);
tcg_gen_extrl_i64_i32(i32, incr);
fns[esz](t_zd, s32, i32, desc);
-
- tcg_temp_free_i32(s32);
- tcg_temp_free_i32(i32);
}
- tcg_temp_free_ptr(t_zd);
return true;
}
tcg_gen_st_i64(pd, cpu_env, dofs);
do_predtest1(pd, pg);
-
- tcg_temp_free_i64(pd);
- tcg_temp_free_i64(pn);
- tcg_temp_free_i64(pm);
- tcg_temp_free_i64(pg);
} else {
/* The operation and flags generation is large. The computation
* of the flags depends on the original contents of the guarding
tcg_gen_ld_i64(pn, cpu_env, nofs);
tcg_gen_ld_i64(pg, cpu_env, gofs);
do_predtest1(pn, pg);
-
- tcg_temp_free_i64(pn);
- tcg_temp_free_i64(pg);
} else {
do_predtest(s, nofs, gofs, words);
}
}
done:
- tcg_temp_free_i64(t);
-
/* PTRUES */
if (setflag) {
tcg_gen_movi_i32(cpu_NF, -(word != 0));
t = tcg_temp_new_i32();
gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
- tcg_temp_free_ptr(t_pd);
- tcg_temp_free_ptr(t_pg);
do_pred_flags(t);
- tcg_temp_free_i32(t);
return true;
}
t2 = tcg_constant_i64(0);
tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
}
- tcg_temp_free_i64(t1);
}
- tcg_temp_free_i64(t0);
}
/* Similarly with a vector and a scalar operand. */
} else {
gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
}
- tcg_temp_free_i32(t32);
break;
case MO_16:
} else {
gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
}
- tcg_temp_free_i32(t32);
break;
case MO_32:
} else {
gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
}
- tcg_temp_free_i64(t64);
break;
case MO_64:
t64 = tcg_temp_new_i64();
tcg_gen_neg_i64(t64, val);
gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
- tcg_temp_free_i64(t64);
} else {
gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
}
default:
g_assert_not_reached();
}
-
- tcg_temp_free_ptr(dptr);
- tcg_temp_free_ptr(nptr);
}
static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
fns[esz](t_zd, t_zn, t_pg, val, desc);
-
- tcg_temp_free_ptr(t_zd);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_pg);
}
static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
fns[a->esz](t_zd, t_zn, val, desc);
-
- tcg_temp_free_ptr(t_zd);
- tcg_temp_free_ptr(t_zn);
}
static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
do_insr_i64(s, a, t);
- tcg_temp_free_i64(t);
}
return true;
}
tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
fn(t_d, t_n, t_m, tcg_constant_i32(desc));
-
- tcg_temp_free_ptr(t_d);
- tcg_temp_free_ptr(t_n);
- tcg_temp_free_ptr(t_m);
return true;
}
desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
fn(t_d, t_n, tcg_constant_i32(desc));
-
- tcg_temp_free_ptr(t_d);
- tcg_temp_free_ptr(t_n);
return true;
}
tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
-
- tcg_temp_free_ptr(t_p);
}
/* Increment LAST to the offset of the next element in the vector,
int rm, int esz)
{
TCGv_ptr p = tcg_temp_new_ptr();
- TCGv_i64 r;
/* Convert offset into vector into offset into ENV.
* The final adjustment for the vector register base
tcg_gen_ext_i32_ptr(p, last);
tcg_gen_add_ptr(p, p, cpu_env);
- r = load_esz(p, vec_full_reg_offset(s, rm), esz);
- tcg_temp_free_ptr(p);
-
- return r;
+ return load_esz(p, vec_full_reg_offset(s, rm), esz);
}
/* Compute CLAST for a Zreg. */
return true;
}
- last = tcg_temp_local_new_i32();
+ last = tcg_temp_new_i32();
over = gen_new_label();
find_last_active(s, last, esz, a->pg);
}
ele = load_last_active(s, last, a->rm, esz);
- tcg_temp_free_i32(last);
vsz = vec_full_reg_size(s);
tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
- tcg_temp_free_i64(ele);
/* If this insn used MOVPRFX, we may need a second move. */
if (a->rd != a->rn) {
* a conditional move.
*/
ele = load_last_active(s, last, rm, esz);
- tcg_temp_free_i32(last);
tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
ele, reg_val);
-
- tcg_temp_free_i64(cmp);
- tcg_temp_free_i64(ele);
}
/* Compute CLAST for a Vreg. */
do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
write_fp_dreg(s, a->rd, reg);
- tcg_temp_free_i64(reg);
}
return true;
}
int pg, int rm, bool before)
{
TCGv_i32 last = tcg_temp_new_i32();
- TCGv_i64 ret;
find_last_active(s, last, esz, pg);
if (before) {
incr_last_active(s, last, esz);
}
- ret = load_last_active(s, last, rm, esz);
- tcg_temp_free_i32(last);
- return ret;
+ return load_last_active(s, last, rm, esz);
}
/* Compute LAST for a Vreg. */
if (sve_access_check(s)) {
TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
write_fp_dreg(s, a->rd, val);
- tcg_temp_free_i64(val);
}
return true;
}
if (sve_access_check(s)) {
TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
- tcg_temp_free_i64(val);
}
return true;
}
int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
- tcg_temp_free_i64(t);
}
return true;
}
gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(zn);
- tcg_temp_free_ptr(zm);
- tcg_temp_free_ptr(pg);
-
do_pred_flags(t);
-
- tcg_temp_free_i32(t);
return true;
}
gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(zn);
- tcg_temp_free_ptr(pg);
-
do_pred_flags(t);
-
- tcg_temp_free_i32(t);
return true;
}
TCGv_i32 t = tcg_temp_new_i32();
fn_s(t, d, n, m, g, desc);
do_pred_flags(t);
- tcg_temp_free_i32(t);
} else {
fn(d, n, m, g, desc);
}
- tcg_temp_free_ptr(d);
- tcg_temp_free_ptr(n);
- tcg_temp_free_ptr(m);
- tcg_temp_free_ptr(g);
return true;
}
TCGv_i32 t = tcg_temp_new_i32();
fn_s(t, d, n, g, desc);
do_pred_flags(t);
- tcg_temp_free_i32(t);
} else {
fn(d, n, g, desc);
}
- tcg_temp_free_ptr(d);
- tcg_temp_free_ptr(n);
- tcg_temp_free_ptr(g);
return true;
}
TCGv_i64 g = tcg_temp_new_i64();
tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
tcg_gen_and_i64(val, val, g);
- tcg_temp_free_i64(g);
}
/* Reduce the pred_esz_masks value simply to reduce the
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
- tcg_temp_free_ptr(t_pn);
- tcg_temp_free_ptr(t_pg);
}
}
} else {
tcg_gen_add_i64(reg, reg, val);
}
- tcg_temp_free_i64(val);
}
return true;
}
tcg_gen_setcond_i64(cond, cmp, rn, rm);
tcg_gen_extrl_i64_i32(cpu_NF, cmp);
- tcg_temp_free_i64(cmp);
/* VF = !NF & !CF. */
tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
/* Set the count to zero if the condition is false. */
tcg_gen_movi_i64(t1, 0);
tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
- tcg_temp_free_i64(t1);
/* Since we're bounded, pass as a 32-bit type. */
t2 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(t2, t0);
- tcg_temp_free_i64(t0);
/* Scale elements to bits. */
tcg_gen_shli_i32(t2, t2, a->esz);
gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
}
do_pred_flags(t2);
-
- tcg_temp_free_ptr(ptr);
- tcg_temp_free_i32(t2);
return true;
}
tcg_gen_sub_i64(diff, op0, op1);
tcg_gen_sub_i64(t1, op1, op0);
tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
- tcg_temp_free_i64(t1);
/* Round down to a multiple of ESIZE. */
tcg_gen_andi_i64(diff, diff, -1 << a->esz);
/* If op1 == op0, diff == 0, and the condition is always true. */
/* Since we're bounded, pass as a 32-bit type. */
t2 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(t2, diff);
- tcg_temp_free_i64(diff);
desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
do_pred_flags(t2);
-
- tcg_temp_free_ptr(ptr);
- tcg_temp_free_i32(t2);
return true;
}
status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
fn(temp, t_zn, t_pg, status, t_desc);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_ptr(status);
write_fp_dreg(s, a->rd, temp);
- tcg_temp_free_i64(temp);
return true;
}
vec_full_reg_offset(s, a->rn),
pred_full_reg_offset(s, a->pg),
status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
}
return true;
}
fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
- tcg_temp_free_ptr(t_fpst);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_ptr(t_rm);
-
write_fp_dreg(s, a->rd, t_val);
- tcg_temp_free_i64(t_val);
return true;
}
status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fn(t_zd, t_zn, t_pg, scalar, status, desc);
-
- tcg_temp_free_ptr(status);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_zd);
}
static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
vec_full_reg_offset(s, a->rm),
pred_full_reg_offset(s, a->pg),
status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
}
return true;
}
a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
- int mode, gen_helper_gvec_3_ptr *fn)
+ ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
{
unsigned vsz;
TCGv_i32 tmode;
}
vsz = vec_full_reg_size(s);
- tmode = tcg_const_i32(mode);
status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
-
- gen_helper_set_rmode(tmode, tmode, status);
+ tmode = gen_set_rmode(mode, status);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
pred_full_reg_offset(s, a->pg),
status, vsz, vsz, 0, fn);
- gen_helper_set_rmode(tmode, tmode, status);
- tcg_temp_free_i32(tmode);
- tcg_temp_free_ptr(status);
+ gen_restore_rmode(tmode, status);
return true;
}
TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
- float_round_nearest_even, frint_fns[a->esz])
+ FPROUNDING_TIEEVEN, frint_fns[a->esz])
TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
- float_round_up, frint_fns[a->esz])
+ FPROUNDING_POSINF, frint_fns[a->esz])
TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
- float_round_down, frint_fns[a->esz])
+ FPROUNDING_NEGINF, frint_fns[a->esz])
TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
- float_round_to_zero, frint_fns[a->esz])
+ FPROUNDING_ZERO, frint_fns[a->esz])
TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
- float_round_ties_away, frint_fns[a->esz])
+ FPROUNDING_TIEAWAY, frint_fns[a->esz])
static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
NULL, gen_helper_sve_frecpx_h,
void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
int len, int rn, int imm)
{
- int len_align = QEMU_ALIGN_DOWN(len, 8);
- int len_remain = len % 8;
- int nparts = len / 8 + ctpop8(len_remain);
+ int len_align = QEMU_ALIGN_DOWN(len, 16);
+ int len_remain = len % 16;
+ int nparts = len / 16 + ctpop8(len_remain);
int midx = get_mem_index(s);
TCGv_i64 dirty_addr, clean_addr, t0, t1;
+ TCGv_i128 t16;
dirty_addr = tcg_temp_new_i64();
tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
- clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
- tcg_temp_free_i64(dirty_addr);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
/*
* Note that unpredicated load/store of vector/predicate registers
int i;
t0 = tcg_temp_new_i64();
- for (i = 0; i < len_align; i += 8) {
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
+ t1 = tcg_temp_new_i64();
+ t16 = tcg_temp_new_i128();
+
+ for (i = 0; i < len_align; i += 16) {
+ tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_extr_i128_i64(t0, t1, t16);
tcg_gen_st_i64(t0, base, vofs + i);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tcg_gen_st_i64(t1, base, vofs + i + 8);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
- tcg_temp_free_i64(t0);
} else {
TCGLabel *loop = gen_new_label();
- TCGv_ptr tp, i = tcg_const_local_ptr(0);
-
- /* Copy the clean address into a local temp, live across the loop. */
- t0 = clean_addr;
- clean_addr = new_tmp_a64_local(s);
- tcg_gen_mov_i64(clean_addr, t0);
-
- if (base != cpu_env) {
- TCGv_ptr b = tcg_temp_local_new_ptr();
- tcg_gen_mov_ptr(b, base);
- base = b;
- }
+ TCGv_ptr tp, i = tcg_temp_new_ptr();
+ tcg_gen_movi_ptr(i, 0);
gen_set_label(loop);
- t0 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ t16 = tcg_temp_new_i128();
+ tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tp = tcg_temp_new_ptr();
tcg_gen_add_ptr(tp, base, i);
- tcg_gen_addi_ptr(i, i, 8);
+ tcg_gen_addi_ptr(i, i, 16);
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_extr_i128_i64(t0, t1, t16);
+
tcg_gen_st_i64(t0, tp, vofs);
- tcg_temp_free_ptr(tp);
- tcg_temp_free_i64(t0);
+ tcg_gen_st_i64(t1, tp, vofs + 8);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
- tcg_temp_free_ptr(i);
-
- if (base != cpu_env) {
- tcg_temp_free_ptr(base);
- assert(len_remain == 0);
- }
}
/*
* Predicate register loads can be any multiple of 2.
* Note that we still store the entire 64-bit unit into cpu_env.
*/
+ if (len_remain >= 8) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_st_i64(t0, base, vofs + len_align);
+ len_remain -= 8;
+ len_align += 8;
+ if (len_remain) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ }
+ }
if (len_remain) {
t0 = tcg_temp_new_i64();
switch (len_remain) {
case 4:
case 8:
tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain));
+ MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
break;
case 6:
t1 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
- tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
+ tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
- tcg_temp_free_i64(t1);
break;
default:
g_assert_not_reached();
}
tcg_gen_st_i64(t0, base, vofs + len_align);
- tcg_temp_free_i64(t0);
}
}
void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
int len, int rn, int imm)
{
- int len_align = QEMU_ALIGN_DOWN(len, 8);
- int len_remain = len % 8;
- int nparts = len / 8 + ctpop8(len_remain);
+ int len_align = QEMU_ALIGN_DOWN(len, 16);
+ int len_remain = len % 16;
+ int nparts = len / 16 + ctpop8(len_remain);
int midx = get_mem_index(s);
- TCGv_i64 dirty_addr, clean_addr, t0;
+ TCGv_i64 dirty_addr, clean_addr, t0, t1;
+ TCGv_i128 t16;
dirty_addr = tcg_temp_new_i64();
tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
- clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
- tcg_temp_free_i64(dirty_addr);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
/* Note that unpredicated load/store of vector/predicate registers
* are defined as a stream of bytes, which equates to little-endian
int i;
t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ t16 = tcg_temp_new_i128();
for (i = 0; i < len_align; i += 8) {
tcg_gen_ld_i64(t0, base, vofs + i);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tcg_gen_ld_i64(t1, base, vofs + i + 8);
+ tcg_gen_concat_i64_i128(t16, t0, t1);
+ tcg_gen_qemu_st_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
- tcg_temp_free_i64(t0);
} else {
TCGLabel *loop = gen_new_label();
- TCGv_ptr tp, i = tcg_const_local_ptr(0);
-
- /* Copy the clean address into a local temp, live across the loop. */
- t0 = clean_addr;
- clean_addr = new_tmp_a64_local(s);
- tcg_gen_mov_i64(clean_addr, t0);
-
- if (base != cpu_env) {
- TCGv_ptr b = tcg_temp_local_new_ptr();
- tcg_gen_mov_ptr(b, base);
- base = b;
- }
+ TCGv_ptr tp, i = tcg_temp_new_ptr();
+ tcg_gen_movi_ptr(i, 0);
gen_set_label(loop);
t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
tp = tcg_temp_new_ptr();
tcg_gen_add_ptr(tp, base, i);
tcg_gen_ld_i64(t0, tp, vofs);
- tcg_gen_addi_ptr(i, i, 8);
- tcg_temp_free_ptr(tp);
+ tcg_gen_ld_i64(t1, tp, vofs + 8);
+ tcg_gen_addi_ptr(i, i, 16);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
- tcg_temp_free_i64(t0);
+ t16 = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(t16, t0, t1);
- tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
- tcg_temp_free_ptr(i);
+ tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
- if (base != cpu_env) {
- tcg_temp_free_ptr(base);
- assert(len_remain == 0);
- }
+ tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
}
/* Predicate register stores can be any multiple of 2. */
+ if (len_remain >= 8) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ len_remain -= 8;
+ len_align += 8;
+ if (len_remain) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ }
+ }
if (len_remain) {
t0 = tcg_temp_new_i64();
tcg_gen_ld_i64(t0, base, vofs + len_align);
case 4:
case 8:
tcg_gen_qemu_st_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain));
+ MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
break;
case 6:
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
break;
default:
g_assert_not_reached();
}
- tcg_temp_free_i64(t0);
}
}
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
-
- tcg_temp_free_ptr(t_pg);
}
/* Indexed by [mte][be][dtype][nreg] */
return false;
}
if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s);
int elements = vsz >> dtype_esz[a->dtype];
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
(a->imm * elements * (a->nreg + 1))
}
s->is_nonstreaming = true;
if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
int vsz = vec_full_reg_size(s);
int elements = vsz >> dtype_esz[a->dtype];
int off = (a->imm * elements) << dtype_msz(a->dtype);
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
poff = offsetof(CPUARMState, vfp.preg_tmp);
tcg_gen_st_i64(tmp, cpu_env, poff);
- tcg_temp_free_i64(tmp);
}
t_pg = tcg_temp_new_ptr();
= ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
- tcg_temp_free_ptr(t_pg);
-
/* Replicate that first quadword. */
if (vsz > 16) {
int doff = vec_full_reg_offset(s, zt);
}
if (sve_access_check(s)) {
int msz = dtype_msz(a->dtype);
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
do_ldrq(s, a->rd, a->pg, addr, a->dtype);
return false;
}
if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
do_ldrq(s, a->rd, a->pg, addr, a->dtype);
}
poff = offsetof(CPUARMState, vfp.preg_tmp);
tcg_gen_st_i64(tmp, cpu_env, poff);
- tcg_temp_free_i64(tmp);
}
t_pg = tcg_temp_new_ptr();
= ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
- tcg_temp_free_ptr(t_pg);
-
/*
* Replicate that first octaword.
* The replication happens in units of 32; if the full vector size
}
s->is_nonstreaming = true;
if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
do_ldro(s, a->rd, a->pg, addr, a->dtype);
}
s->is_nonstreaming = true;
if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
do_ldro(s, a->rd, a->pg, addr, a->dtype);
}
unsigned msz = dtype_msz(a->dtype);
TCGLabel *over;
TCGv_i64 temp, clean_addr;
+ MemOp memop;
if (!dc_isar_feature(aa64_sve, s)) {
return false;
tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
- tcg_temp_free_i64(temp);
} else {
TCGv_i32 t32 = tcg_temp_new_i32();
find_last_active(s, t32, esz, a->pg);
tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
- tcg_temp_free_i32(t32);
}
/* Load the data. */
temp = tcg_temp_new_i64();
tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
- clean_addr = gen_mte_check1(s, temp, false, true, msz);
- tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
- finalize_memop(s, dtype_mop[a->dtype]));
+ memop = finalize_memop(s, dtype_mop[a->dtype]);
+ clean_addr = gen_mte_check1(s, temp, false, true, memop);
+ tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
/* Broadcast to *all* elements. */
tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
vsz, vsz, temp);
- tcg_temp_free_i64(temp);
/* Zero the inactive elements. */
gen_set_label(over);
return false;
}
if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s);
int elements = vsz >> a->esz;
- TCGv_i64 addr = new_tmp_a64(s);
+ TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
(a->imm * elements * (a->nreg + 1)) << a->msz);
tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
-
- tcg_temp_free_ptr(t_zt);
- tcg_temp_free_ptr(t_zm);
- tcg_temp_free_ptr(t_pg);
}
/* Indexed by [mte][be][ff][xs][u][msz]. */
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
tcg_gen_and_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
} else {
tcg_gen_sari_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
tcg_gen_and_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
} else {
tcg_gen_shri_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_and_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
} else {
tcg_gen_shli_vec(vece, d, n, halfbits);
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
tcg_gen_smin_vec(vece, d, d, t);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_and_vec(vece, d, d, t);
- tcg_temp_free_vec(t);
}
static const GVecGen2 sqxtnb_ops[3] = {
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const GVecGen2 sqxtnt_ops[3] = {
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
}
static const GVecGen2 uqxtnb_ops[3] = {
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const GVecGen2 uqxtnt_ops[3] = {
tcg_gen_smax_vec(vece, d, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, d, d, t);
- tcg_temp_free_vec(t);
}
static const GVecGen2 sqxtunb_ops[3] = {
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const GVecGen2 sqxtunt_ops[3] = {
tcg_gen_shri_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_and_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
}
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_umin_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
}
static const TCGOpcode sqshrunb_vec_list[] = {
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const TCGOpcode sqshrunt_vec_list[] = {
tcg_gen_smin_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_and_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
}
static const TCGOpcode sqshrnb_vec_list[] = {
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const TCGOpcode sqshrnt_vec_list[] = {
tcg_gen_shri_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_umin_vec(vece, d, n, t);
- tcg_temp_free_vec(t);
}
static const TCGOpcode uqshrnb_vec_list[] = {
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
- tcg_temp_free_vec(t);
}
static const TCGOpcode uqshrnt_vec_list[] = {
gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
- float_round_to_odd, gen_helper_sve_fcvt_ds)
+ FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
- float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
+ FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds)
static gen_helper_gvec_3_ptr * const flogb_fns[] = {
NULL, gen_helper_flogb_h,
/* Apply to either copy the source, or write zeros. */
tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
pred_full_reg_offset(s, a->pn), tmp, pl, pl);
-
- tcg_temp_free_i64(tmp);
- tcg_temp_free_i64(dbit);
- tcg_temp_free_i64(didx);
- tcg_temp_free_ptr(ptr);
return true;
}