2 * ARM translation: M-profile MVE instructions
4 * Copyright (c) 2021 Linaro, Ltd.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "translate.h"
25 #include "translate-a32.h"
27 static inline int vidup_imm(DisasContext
*s
, int x
)
32 /* Include the generated decoder */
33 #include "decode-mve.c.inc"
35 typedef void MVEGenLdStFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
36 typedef void MVEGenLdStSGFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
37 typedef void MVEGenLdStIlFn(TCGv_ptr
, TCGv_i32
, TCGv_i32
);
38 typedef void MVEGenOneOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
39 typedef void MVEGenTwoOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
42 typedef void MVEGenLongDualAccOpFn(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
43 typedef void MVEGenVADDVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
44 typedef void MVEGenOneOpImmFn(TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
45 typedef void MVEGenVIDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
);
46 typedef void MVEGenVIWDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
47 typedef void MVEGenCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
48 typedef void MVEGenScalarCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
49 typedef void MVEGenVABAVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
50 typedef void MVEGenDualAccOpFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
51 typedef void MVEGenVCVTRmodeFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
53 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
54 static inline long mve_qreg_offset(unsigned reg
)
56 return offsetof(CPUARMState
, vfp
.zregs
[reg
].d
[0]);
59 static TCGv_ptr
mve_qreg_ptr(unsigned reg
)
61 TCGv_ptr ret
= tcg_temp_new_ptr();
62 tcg_gen_addi_ptr(ret
, cpu_env
, mve_qreg_offset(reg
));
66 static bool mve_no_predication(DisasContext
*s
)
69 * Return true if we are executing the entire MVE instruction
70 * with no predication or partial-execution, and so we can safely
71 * use an inline TCG vector implementation.
73 return s
->eci
== 0 && s
->mve_no_pred
;
76 static bool mve_check_qreg_bank(DisasContext
*s
, int qmask
)
79 * Check whether Qregs are in range. For v8.1M only Q0..Q7
80 * are supported, see VFPSmallRegisterBank().
85 bool mve_eci_check(DisasContext
*s
)
88 * This is a beatwise insn: check that ECI is valid (not a
89 * reserved value) and note that we are handling it.
90 * Return true if OK, false if we generated an exception.
92 s
->eci_handled
= true;
101 /* Reserved value: INVSTATE UsageFault */
102 gen_exception_insn(s
, 0, EXCP_INVSTATE
, syn_uncategorized());
107 void mve_update_eci(DisasContext
*s
)
110 * The helper function will always update the CPUState field,
111 * so we only need to update the DisasContext field.
114 s
->eci
= (s
->eci
== ECI_A0A1A2B0
) ? ECI_A0
: ECI_NONE
;
118 void mve_update_and_store_eci(DisasContext
*s
)
121 * For insns which don't call a helper function that will call
122 * mve_advance_vpt(), this version updates s->eci and also stores
123 * it out to the CPUState field.
127 store_cpu_field(tcg_constant_i32(s
->eci
<< 4), condexec_bits
);
131 static bool mve_skip_first_beat(DisasContext
*s
)
133 /* Return true if PSR.ECI says we must skip the first beat of this insn */
143 g_assert_not_reached();
147 static bool do_ldst(DisasContext
*s
, arg_VLDR_VSTR
*a
, MVEGenLdStFn
*fn
,
154 if (!dc_isar_feature(aa32_mve
, s
) ||
155 !mve_check_qreg_bank(s
, a
->qd
) ||
160 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
161 if (a
->rn
== 15 || (a
->rn
== 13 && a
->w
)) {
165 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
169 offset
= a
->imm
<< msize
;
173 addr
= load_reg(s
, a
->rn
);
175 tcg_gen_addi_i32(addr
, addr
, offset
);
178 qreg
= mve_qreg_ptr(a
->qd
);
179 fn(cpu_env
, qreg
, addr
);
182 * Writeback always happens after the last beat of the insn,
183 * regardless of predication
187 tcg_gen_addi_i32(addr
, addr
, offset
);
189 store_reg(s
, a
->rn
, addr
);
195 static bool trans_VLDR_VSTR(DisasContext
*s
, arg_VLDR_VSTR
*a
)
197 static MVEGenLdStFn
* const ldstfns
[4][2] = {
198 { gen_helper_mve_vstrb
, gen_helper_mve_vldrb
},
199 { gen_helper_mve_vstrh
, gen_helper_mve_vldrh
},
200 { gen_helper_mve_vstrw
, gen_helper_mve_vldrw
},
203 return do_ldst(s
, a
, ldstfns
[a
->size
][a
->l
], a
->size
);
206 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
207 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
209 static MVEGenLdStFn * const ldstfns[2][2] = { \
210 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
211 { NULL, gen_helper_mve_##ULD }, \
213 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
216 DO_VLDST_WIDE_NARROW(VLDSTB_H
, vldrb_sh
, vldrb_uh
, vstrb_h
, MO_8
)
217 DO_VLDST_WIDE_NARROW(VLDSTB_W
, vldrb_sw
, vldrb_uw
, vstrb_w
, MO_8
)
218 DO_VLDST_WIDE_NARROW(VLDSTH_W
, vldrh_sw
, vldrh_uw
, vstrh_w
, MO_16
)
220 static bool do_ldst_sg(DisasContext
*s
, arg_vldst_sg
*a
, MVEGenLdStSGFn fn
)
225 if (!dc_isar_feature(aa32_mve
, s
) ||
226 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
227 !fn
|| a
->rn
== 15) {
228 /* Rn case is UNPREDICTABLE */
232 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
236 addr
= load_reg(s
, a
->rn
);
238 qd
= mve_qreg_ptr(a
->qd
);
239 qm
= mve_qreg_ptr(a
->qm
);
240 fn(cpu_env
, qd
, qm
, addr
);
246 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
247 * signextended to halfword elements in register". _os_ indicates that
248 * the offsets in Qm should be scaled by the element size.
250 /* This macro is just to make the arrays more compact in these functions */
251 #define F(N) gen_helper_mve_##N
253 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
254 static bool trans_VLDR_S_sg(DisasContext
*s
, arg_vldst_sg
*a
)
256 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
257 { NULL
, F(vldrb_sg_sh
), F(vldrb_sg_sw
), NULL
},
258 { NULL
, NULL
, F(vldrh_sg_sw
), NULL
},
259 { NULL
, NULL
, NULL
, NULL
},
260 { NULL
, NULL
, NULL
, NULL
}
262 { NULL
, NULL
, NULL
, NULL
},
263 { NULL
, NULL
, F(vldrh_sg_os_sw
), NULL
},
264 { NULL
, NULL
, NULL
, NULL
},
265 { NULL
, NULL
, NULL
, NULL
}
268 if (a
->qd
== a
->qm
) {
269 return false; /* UNPREDICTABLE */
271 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
274 static bool trans_VLDR_U_sg(DisasContext
*s
, arg_vldst_sg
*a
)
276 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
277 { F(vldrb_sg_ub
), F(vldrb_sg_uh
), F(vldrb_sg_uw
), NULL
},
278 { NULL
, F(vldrh_sg_uh
), F(vldrh_sg_uw
), NULL
},
279 { NULL
, NULL
, F(vldrw_sg_uw
), NULL
},
280 { NULL
, NULL
, NULL
, F(vldrd_sg_ud
) }
282 { NULL
, NULL
, NULL
, NULL
},
283 { NULL
, F(vldrh_sg_os_uh
), F(vldrh_sg_os_uw
), NULL
},
284 { NULL
, NULL
, F(vldrw_sg_os_uw
), NULL
},
285 { NULL
, NULL
, NULL
, F(vldrd_sg_os_ud
) }
288 if (a
->qd
== a
->qm
) {
289 return false; /* UNPREDICTABLE */
291 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
294 static bool trans_VSTR_sg(DisasContext
*s
, arg_vldst_sg
*a
)
296 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
297 { F(vstrb_sg_ub
), F(vstrb_sg_uh
), F(vstrb_sg_uw
), NULL
},
298 { NULL
, F(vstrh_sg_uh
), F(vstrh_sg_uw
), NULL
},
299 { NULL
, NULL
, F(vstrw_sg_uw
), NULL
},
300 { NULL
, NULL
, NULL
, F(vstrd_sg_ud
) }
302 { NULL
, NULL
, NULL
, NULL
},
303 { NULL
, F(vstrh_sg_os_uh
), F(vstrh_sg_os_uw
), NULL
},
304 { NULL
, NULL
, F(vstrw_sg_os_uw
), NULL
},
305 { NULL
, NULL
, NULL
, F(vstrd_sg_os_ud
) }
308 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
313 static bool do_ldst_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
,
314 MVEGenLdStSGFn
*fn
, unsigned msize
)
319 if (!dc_isar_feature(aa32_mve
, s
) ||
320 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
325 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
329 offset
= a
->imm
<< msize
;
334 qd
= mve_qreg_ptr(a
->qd
);
335 qm
= mve_qreg_ptr(a
->qm
);
336 fn(cpu_env
, qd
, qm
, tcg_constant_i32(offset
));
341 static bool trans_VLDRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
343 static MVEGenLdStSGFn
* const fns
[] = {
344 gen_helper_mve_vldrw_sg_uw
,
345 gen_helper_mve_vldrw_sg_wb_uw
,
347 if (a
->qd
== a
->qm
) {
348 return false; /* UNPREDICTABLE */
350 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
353 static bool trans_VLDRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
355 static MVEGenLdStSGFn
* const fns
[] = {
356 gen_helper_mve_vldrd_sg_ud
,
357 gen_helper_mve_vldrd_sg_wb_ud
,
359 if (a
->qd
== a
->qm
) {
360 return false; /* UNPREDICTABLE */
362 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
365 static bool trans_VSTRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
367 static MVEGenLdStSGFn
* const fns
[] = {
368 gen_helper_mve_vstrw_sg_uw
,
369 gen_helper_mve_vstrw_sg_wb_uw
,
371 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
374 static bool trans_VSTRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
376 static MVEGenLdStSGFn
* const fns
[] = {
377 gen_helper_mve_vstrd_sg_ud
,
378 gen_helper_mve_vstrd_sg_wb_ud
,
380 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
383 static bool do_vldst_il(DisasContext
*s
, arg_vldst_il
*a
, MVEGenLdStIlFn
*fn
,
388 if (!dc_isar_feature(aa32_mve
, s
) ||
389 !mve_check_qreg_bank(s
, a
->qd
) ||
390 !fn
|| (a
->rn
== 13 && a
->w
) || a
->rn
== 15) {
391 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
394 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
398 rn
= load_reg(s
, a
->rn
);
400 * We pass the index of Qd, not a pointer, because the helper must
401 * access multiple Q registers starting at Qd and working up.
403 fn(cpu_env
, tcg_constant_i32(a
->qd
), rn
);
406 tcg_gen_addi_i32(rn
, rn
, addrinc
);
407 store_reg(s
, a
->rn
, rn
);
409 mve_update_and_store_eci(s
);
413 /* This macro is just to make the arrays more compact in these functions */
414 #define F(N) gen_helper_mve_##N
416 static bool trans_VLD2(DisasContext
*s
, arg_vldst_il
*a
)
418 static MVEGenLdStIlFn
* const fns
[4][4] = {
419 { F(vld20b
), F(vld20h
), F(vld20w
), NULL
, },
420 { F(vld21b
), F(vld21h
), F(vld21w
), NULL
, },
421 { NULL
, NULL
, NULL
, NULL
},
422 { NULL
, NULL
, NULL
, NULL
},
427 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
430 static bool trans_VLD4(DisasContext
*s
, arg_vldst_il
*a
)
432 static MVEGenLdStIlFn
* const fns
[4][4] = {
433 { F(vld40b
), F(vld40h
), F(vld40w
), NULL
, },
434 { F(vld41b
), F(vld41h
), F(vld41w
), NULL
, },
435 { F(vld42b
), F(vld42h
), F(vld42w
), NULL
, },
436 { F(vld43b
), F(vld43h
), F(vld43w
), NULL
, },
441 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
444 static bool trans_VST2(DisasContext
*s
, arg_vldst_il
*a
)
446 static MVEGenLdStIlFn
* const fns
[4][4] = {
447 { F(vst20b
), F(vst20h
), F(vst20w
), NULL
, },
448 { F(vst21b
), F(vst21h
), F(vst21w
), NULL
, },
449 { NULL
, NULL
, NULL
, NULL
},
450 { NULL
, NULL
, NULL
, NULL
},
455 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
458 static bool trans_VST4(DisasContext
*s
, arg_vldst_il
*a
)
460 static MVEGenLdStIlFn
* const fns
[4][4] = {
461 { F(vst40b
), F(vst40h
), F(vst40w
), NULL
, },
462 { F(vst41b
), F(vst41h
), F(vst41w
), NULL
, },
463 { F(vst42b
), F(vst42h
), F(vst42w
), NULL
, },
464 { F(vst43b
), F(vst43h
), F(vst43w
), NULL
, },
469 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
474 static bool trans_VDUP(DisasContext
*s
, arg_VDUP
*a
)
479 if (!dc_isar_feature(aa32_mve
, s
) ||
480 !mve_check_qreg_bank(s
, a
->qd
)) {
483 if (a
->rt
== 13 || a
->rt
== 15) {
484 /* UNPREDICTABLE; we choose to UNDEF */
487 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
491 rt
= load_reg(s
, a
->rt
);
492 if (mve_no_predication(s
)) {
493 tcg_gen_gvec_dup_i32(a
->size
, mve_qreg_offset(a
->qd
), 16, 16, rt
);
495 qd
= mve_qreg_ptr(a
->qd
);
496 tcg_gen_dup_i32(a
->size
, rt
, rt
);
497 gen_helper_mve_vdup(cpu_env
, qd
, rt
);
503 static bool do_1op_vec(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
,
508 if (!dc_isar_feature(aa32_mve
, s
) ||
509 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
514 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
518 if (vecfn
&& mve_no_predication(s
)) {
519 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
), 16, 16);
521 qd
= mve_qreg_ptr(a
->qd
);
522 qm
= mve_qreg_ptr(a
->qm
);
529 static bool do_1op(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
)
531 return do_1op_vec(s
, a
, fn
, NULL
);
534 #define DO_1OP_VEC(INSN, FN, VECFN) \
535 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
537 static MVEGenOneOpFn * const fns[] = { \
538 gen_helper_mve_##FN##b, \
539 gen_helper_mve_##FN##h, \
540 gen_helper_mve_##FN##w, \
543 return do_1op_vec(s, a, fns[a->size], VECFN); \
546 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
550 DO_1OP_VEC(VABS
, vabs
, tcg_gen_gvec_abs
)
551 DO_1OP_VEC(VNEG
, vneg
, tcg_gen_gvec_neg
)
558 * For simple float/int conversions we use the fixed-point
559 * conversion helpers with a zero shift count
561 #define DO_VCVT(INSN, HFN, SFN) \
562 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
564 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
566 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
568 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
570 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
572 static MVEGenOneOpFn * const fns[] = { \
578 if (!dc_isar_feature(aa32_mve_fp, s)) { \
581 return do_1op(s, a, fns[a->size]); \
584 DO_VCVT(VCVT_SF
, vcvt_sh
, vcvt_sf
)
585 DO_VCVT(VCVT_UF
, vcvt_uh
, vcvt_uf
)
586 DO_VCVT(VCVT_FS
, vcvt_hs
, vcvt_fs
)
587 DO_VCVT(VCVT_FU
, vcvt_hu
, vcvt_fu
)
589 static bool do_vcvt_rmode(DisasContext
*s
, arg_1op
*a
,
590 ARMFPRounding rmode
, bool u
)
593 * Handle VCVT fp to int with specified rounding mode.
594 * This is a 1op fn but we must pass the rounding mode as
595 * an immediate to the helper.
598 static MVEGenVCVTRmodeFn
* const fns
[4][2] = {
600 { gen_helper_mve_vcvt_rm_sh
, gen_helper_mve_vcvt_rm_uh
},
601 { gen_helper_mve_vcvt_rm_ss
, gen_helper_mve_vcvt_rm_us
},
604 MVEGenVCVTRmodeFn
*fn
= fns
[a
->size
][u
];
606 if (!dc_isar_feature(aa32_mve_fp
, s
) ||
607 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
612 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
616 qd
= mve_qreg_ptr(a
->qd
);
617 qm
= mve_qreg_ptr(a
->qm
);
618 fn(cpu_env
, qd
, qm
, tcg_constant_i32(arm_rmode_to_sf(rmode
)));
623 #define DO_VCVT_RMODE(INSN, RMODE, U) \
624 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
626 return do_vcvt_rmode(s, a, RMODE, U); \
629 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
630 DO_VCVT_RMODE(VCVTAU
, FPROUNDING_TIEAWAY
, true)
631 DO_VCVT_RMODE(VCVTNS
, FPROUNDING_TIEEVEN
, false)
632 DO_VCVT_RMODE(VCVTNU
, FPROUNDING_TIEEVEN
, true)
633 DO_VCVT_RMODE(VCVTPS
, FPROUNDING_POSINF
, false)
634 DO_VCVT_RMODE(VCVTPU
, FPROUNDING_POSINF
, true)
635 DO_VCVT_RMODE(VCVTMS
, FPROUNDING_NEGINF
, false)
636 DO_VCVT_RMODE(VCVTMU
, FPROUNDING_NEGINF
, true)
638 #define DO_VCVT_SH(INSN, FN) \
639 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
641 if (!dc_isar_feature(aa32_mve_fp, s)) { \
644 return do_1op(s, a, gen_helper_mve_##FN); \
647 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
648 DO_VCVT_SH(VCVTT_SH
, vcvtt_sh
)
649 DO_VCVT_SH(VCVTB_HS
, vcvtb_hs
)
650 DO_VCVT_SH(VCVTT_HS
, vcvtt_hs
)
652 #define DO_VRINT(INSN, RMODE) \
653 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
655 gen_helper_mve_vrint_rm_h(env, qd, qm, \
656 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
658 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
660 gen_helper_mve_vrint_rm_s(env, qd, qm, \
661 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
663 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
665 static MVEGenOneOpFn * const fns[] = { \
671 if (!dc_isar_feature(aa32_mve_fp, s)) { \
674 return do_1op(s, a, fns[a->size]); \
677 DO_VRINT(VRINTN
, FPROUNDING_TIEEVEN
)
678 DO_VRINT(VRINTA
, FPROUNDING_TIEAWAY
)
679 DO_VRINT(VRINTZ
, FPROUNDING_ZERO
)
680 DO_VRINT(VRINTM
, FPROUNDING_NEGINF
)
681 DO_VRINT(VRINTP
, FPROUNDING_POSINF
)
683 static bool trans_VRINTX(DisasContext
*s
, arg_1op
*a
)
685 static MVEGenOneOpFn
* const fns
[] = {
687 gen_helper_mve_vrintx_h
,
688 gen_helper_mve_vrintx_s
,
691 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
694 return do_1op(s
, a
, fns
[a
->size
]);
697 /* Narrowing moves: only size 0 and 1 are valid */
698 #define DO_VMOVN(INSN, FN) \
699 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
701 static MVEGenOneOpFn * const fns[] = { \
702 gen_helper_mve_##FN##b, \
703 gen_helper_mve_##FN##h, \
707 return do_1op(s, a, fns[a->size]); \
710 DO_VMOVN(VMOVNB
, vmovnb
)
711 DO_VMOVN(VMOVNT
, vmovnt
)
712 DO_VMOVN(VQMOVUNB
, vqmovunb
)
713 DO_VMOVN(VQMOVUNT
, vqmovunt
)
714 DO_VMOVN(VQMOVN_BS
, vqmovnbs
)
715 DO_VMOVN(VQMOVN_TS
, vqmovnts
)
716 DO_VMOVN(VQMOVN_BU
, vqmovnbu
)
717 DO_VMOVN(VQMOVN_TU
, vqmovntu
)
719 static bool trans_VREV16(DisasContext
*s
, arg_1op
*a
)
721 static MVEGenOneOpFn
* const fns
[] = {
722 gen_helper_mve_vrev16b
,
727 return do_1op(s
, a
, fns
[a
->size
]);
730 static bool trans_VREV32(DisasContext
*s
, arg_1op
*a
)
732 static MVEGenOneOpFn
* const fns
[] = {
733 gen_helper_mve_vrev32b
,
734 gen_helper_mve_vrev32h
,
738 return do_1op(s
, a
, fns
[a
->size
]);
741 static bool trans_VREV64(DisasContext
*s
, arg_1op
*a
)
743 static MVEGenOneOpFn
* const fns
[] = {
744 gen_helper_mve_vrev64b
,
745 gen_helper_mve_vrev64h
,
746 gen_helper_mve_vrev64w
,
749 return do_1op(s
, a
, fns
[a
->size
]);
752 static bool trans_VMVN(DisasContext
*s
, arg_1op
*a
)
754 return do_1op_vec(s
, a
, gen_helper_mve_vmvn
, tcg_gen_gvec_not
);
757 static bool trans_VABS_fp(DisasContext
*s
, arg_1op
*a
)
759 static MVEGenOneOpFn
* const fns
[] = {
761 gen_helper_mve_vfabsh
,
762 gen_helper_mve_vfabss
,
765 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
768 return do_1op(s
, a
, fns
[a
->size
]);
771 static bool trans_VNEG_fp(DisasContext
*s
, arg_1op
*a
)
773 static MVEGenOneOpFn
* const fns
[] = {
775 gen_helper_mve_vfnegh
,
776 gen_helper_mve_vfnegs
,
779 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
782 return do_1op(s
, a
, fns
[a
->size
]);
785 static bool do_2op_vec(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn fn
,
790 if (!dc_isar_feature(aa32_mve
, s
) ||
791 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
| a
->qm
) ||
795 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
799 if (vecfn
&& mve_no_predication(s
)) {
800 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qn
),
801 mve_qreg_offset(a
->qm
), 16, 16);
803 qd
= mve_qreg_ptr(a
->qd
);
804 qn
= mve_qreg_ptr(a
->qn
);
805 qm
= mve_qreg_ptr(a
->qm
);
806 fn(cpu_env
, qd
, qn
, qm
);
812 static bool do_2op(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn
*fn
)
814 return do_2op_vec(s
, a
, fn
, NULL
);
817 #define DO_LOGIC(INSN, HELPER, VECFN) \
818 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
820 return do_2op_vec(s, a, HELPER, VECFN); \
823 DO_LOGIC(VAND
, gen_helper_mve_vand
, tcg_gen_gvec_and
)
824 DO_LOGIC(VBIC
, gen_helper_mve_vbic
, tcg_gen_gvec_andc
)
825 DO_LOGIC(VORR
, gen_helper_mve_vorr
, tcg_gen_gvec_or
)
826 DO_LOGIC(VORN
, gen_helper_mve_vorn
, tcg_gen_gvec_orc
)
827 DO_LOGIC(VEOR
, gen_helper_mve_veor
, tcg_gen_gvec_xor
)
829 static bool trans_VPSEL(DisasContext
*s
, arg_2op
*a
)
831 /* This insn updates predication bits */
832 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
833 return do_2op(s
, a
, gen_helper_mve_vpsel
);
836 #define DO_2OP_VEC(INSN, FN, VECFN) \
837 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
839 static MVEGenTwoOpFn * const fns[] = { \
840 gen_helper_mve_##FN##b, \
841 gen_helper_mve_##FN##h, \
842 gen_helper_mve_##FN##w, \
845 return do_2op_vec(s, a, fns[a->size], VECFN); \
848 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
850 DO_2OP_VEC(VADD
, vadd
, tcg_gen_gvec_add
)
851 DO_2OP_VEC(VSUB
, vsub
, tcg_gen_gvec_sub
)
852 DO_2OP_VEC(VMUL
, vmul
, tcg_gen_gvec_mul
)
853 DO_2OP(VMULH_S
, vmulhs
)
854 DO_2OP(VMULH_U
, vmulhu
)
855 DO_2OP(VRMULH_S
, vrmulhs
)
856 DO_2OP(VRMULH_U
, vrmulhu
)
857 DO_2OP_VEC(VMAX_S
, vmaxs
, tcg_gen_gvec_smax
)
858 DO_2OP_VEC(VMAX_U
, vmaxu
, tcg_gen_gvec_umax
)
859 DO_2OP_VEC(VMIN_S
, vmins
, tcg_gen_gvec_smin
)
860 DO_2OP_VEC(VMIN_U
, vminu
, tcg_gen_gvec_umin
)
861 DO_2OP(VABD_S
, vabds
)
862 DO_2OP(VABD_U
, vabdu
)
863 DO_2OP(VHADD_S
, vhadds
)
864 DO_2OP(VHADD_U
, vhaddu
)
865 DO_2OP(VHSUB_S
, vhsubs
)
866 DO_2OP(VHSUB_U
, vhsubu
)
867 DO_2OP(VMULL_BS
, vmullbs
)
868 DO_2OP(VMULL_BU
, vmullbu
)
869 DO_2OP(VMULL_TS
, vmullts
)
870 DO_2OP(VMULL_TU
, vmulltu
)
871 DO_2OP(VQDMULH
, vqdmulh
)
872 DO_2OP(VQRDMULH
, vqrdmulh
)
873 DO_2OP(VQADD_S
, vqadds
)
874 DO_2OP(VQADD_U
, vqaddu
)
875 DO_2OP(VQSUB_S
, vqsubs
)
876 DO_2OP(VQSUB_U
, vqsubu
)
877 DO_2OP(VSHL_S
, vshls
)
878 DO_2OP(VSHL_U
, vshlu
)
879 DO_2OP(VRSHL_S
, vrshls
)
880 DO_2OP(VRSHL_U
, vrshlu
)
881 DO_2OP(VQSHL_S
, vqshls
)
882 DO_2OP(VQSHL_U
, vqshlu
)
883 DO_2OP(VQRSHL_S
, vqrshls
)
884 DO_2OP(VQRSHL_U
, vqrshlu
)
885 DO_2OP(VQDMLADH
, vqdmladh
)
886 DO_2OP(VQDMLADHX
, vqdmladhx
)
887 DO_2OP(VQRDMLADH
, vqrdmladh
)
888 DO_2OP(VQRDMLADHX
, vqrdmladhx
)
889 DO_2OP(VQDMLSDH
, vqdmlsdh
)
890 DO_2OP(VQDMLSDHX
, vqdmlsdhx
)
891 DO_2OP(VQRDMLSDH
, vqrdmlsdh
)
892 DO_2OP(VQRDMLSDHX
, vqrdmlsdhx
)
893 DO_2OP(VRHADD_S
, vrhadds
)
894 DO_2OP(VRHADD_U
, vrhaddu
)
896 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
897 * so we can reuse the DO_2OP macro. (Our implementation calculates the
898 * "expected" results in this case.) Similarly for VHCADD.
900 DO_2OP(VCADD90
, vcadd90
)
901 DO_2OP(VCADD270
, vcadd270
)
902 DO_2OP(VHCADD90
, vhcadd90
)
903 DO_2OP(VHCADD270
, vhcadd270
)
905 static bool trans_VQDMULLB(DisasContext
*s
, arg_2op
*a
)
907 static MVEGenTwoOpFn
* const fns
[] = {
909 gen_helper_mve_vqdmullbh
,
910 gen_helper_mve_vqdmullbw
,
913 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
914 /* UNPREDICTABLE; we choose to undef */
917 return do_2op(s
, a
, fns
[a
->size
]);
920 static bool trans_VQDMULLT(DisasContext
*s
, arg_2op
*a
)
922 static MVEGenTwoOpFn
* const fns
[] = {
924 gen_helper_mve_vqdmullth
,
925 gen_helper_mve_vqdmulltw
,
928 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
929 /* UNPREDICTABLE; we choose to undef */
932 return do_2op(s
, a
, fns
[a
->size
]);
935 static bool trans_VMULLP_B(DisasContext
*s
, arg_2op
*a
)
938 * Note that a->size indicates the output size, ie VMULL.P8
939 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
940 * is the 16x16->32 operation and a->size is MO_32.
942 static MVEGenTwoOpFn
* const fns
[] = {
944 gen_helper_mve_vmullpbh
,
945 gen_helper_mve_vmullpbw
,
948 return do_2op(s
, a
, fns
[a
->size
]);
951 static bool trans_VMULLP_T(DisasContext
*s
, arg_2op
*a
)
953 /* a->size is as for trans_VMULLP_B */
954 static MVEGenTwoOpFn
* const fns
[] = {
956 gen_helper_mve_vmullpth
,
957 gen_helper_mve_vmullptw
,
960 return do_2op(s
, a
, fns
[a
->size
]);
964 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
965 * of the 32-bit elements in each lane of the input vectors, where the
966 * carry-out of each add is the carry-in of the next. The initial carry
967 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
968 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
969 * These insns are subject to beat-wise execution. Partial execution
970 * of an I=1 (initial carry input fixed) insn which does not
971 * execute the first beat must start with the current FPSCR.NZCV
972 * value, not the fixed constant input.
974 static bool trans_VADC(DisasContext
*s
, arg_2op
*a
)
976 return do_2op(s
, a
, gen_helper_mve_vadc
);
979 static bool trans_VADCI(DisasContext
*s
, arg_2op
*a
)
981 if (mve_skip_first_beat(s
)) {
982 return trans_VADC(s
, a
);
984 return do_2op(s
, a
, gen_helper_mve_vadci
);
987 static bool trans_VSBC(DisasContext
*s
, arg_2op
*a
)
989 return do_2op(s
, a
, gen_helper_mve_vsbc
);
992 static bool trans_VSBCI(DisasContext
*s
, arg_2op
*a
)
994 if (mve_skip_first_beat(s
)) {
995 return trans_VSBC(s
, a
);
997 return do_2op(s
, a
, gen_helper_mve_vsbci
);
1000 #define DO_2OP_FP(INSN, FN) \
1001 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1003 static MVEGenTwoOpFn * const fns[] = { \
1005 gen_helper_mve_##FN##h, \
1006 gen_helper_mve_##FN##s, \
1009 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1012 return do_2op(s, a, fns[a->size]); \
1015 DO_2OP_FP(VADD_fp
, vfadd
)
1016 DO_2OP_FP(VSUB_fp
, vfsub
)
1017 DO_2OP_FP(VMUL_fp
, vfmul
)
1018 DO_2OP_FP(VABD_fp
, vfabd
)
1019 DO_2OP_FP(VMAXNM
, vmaxnm
)
1020 DO_2OP_FP(VMINNM
, vminnm
)
1021 DO_2OP_FP(VCADD90_fp
, vfcadd90
)
1022 DO_2OP_FP(VCADD270_fp
, vfcadd270
)
1023 DO_2OP_FP(VFMA
, vfma
)
1024 DO_2OP_FP(VFMS
, vfms
)
1025 DO_2OP_FP(VCMUL0
, vcmul0
)
1026 DO_2OP_FP(VCMUL90
, vcmul90
)
1027 DO_2OP_FP(VCMUL180
, vcmul180
)
1028 DO_2OP_FP(VCMUL270
, vcmul270
)
1029 DO_2OP_FP(VCMLA0
, vcmla0
)
1030 DO_2OP_FP(VCMLA90
, vcmla90
)
1031 DO_2OP_FP(VCMLA180
, vcmla180
)
1032 DO_2OP_FP(VCMLA270
, vcmla270
)
1033 DO_2OP_FP(VMAXNMA
, vmaxnma
)
1034 DO_2OP_FP(VMINNMA
, vminnma
)
1036 static bool do_2op_scalar(DisasContext
*s
, arg_2scalar
*a
,
1037 MVEGenTwoOpScalarFn fn
)
1042 if (!dc_isar_feature(aa32_mve
, s
) ||
1043 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
) ||
1047 if (a
->rm
== 13 || a
->rm
== 15) {
1051 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1055 qd
= mve_qreg_ptr(a
->qd
);
1056 qn
= mve_qreg_ptr(a
->qn
);
1057 rm
= load_reg(s
, a
->rm
);
1058 fn(cpu_env
, qd
, qn
, rm
);
1063 #define DO_2OP_SCALAR(INSN, FN) \
1064 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1066 static MVEGenTwoOpScalarFn * const fns[] = { \
1067 gen_helper_mve_##FN##b, \
1068 gen_helper_mve_##FN##h, \
1069 gen_helper_mve_##FN##w, \
1072 return do_2op_scalar(s, a, fns[a->size]); \
1075 DO_2OP_SCALAR(VADD_scalar
, vadd_scalar
)
1076 DO_2OP_SCALAR(VSUB_scalar
, vsub_scalar
)
1077 DO_2OP_SCALAR(VMUL_scalar
, vmul_scalar
)
1078 DO_2OP_SCALAR(VHADD_S_scalar
, vhadds_scalar
)
1079 DO_2OP_SCALAR(VHADD_U_scalar
, vhaddu_scalar
)
1080 DO_2OP_SCALAR(VHSUB_S_scalar
, vhsubs_scalar
)
1081 DO_2OP_SCALAR(VHSUB_U_scalar
, vhsubu_scalar
)
1082 DO_2OP_SCALAR(VQADD_S_scalar
, vqadds_scalar
)
1083 DO_2OP_SCALAR(VQADD_U_scalar
, vqaddu_scalar
)
1084 DO_2OP_SCALAR(VQSUB_S_scalar
, vqsubs_scalar
)
1085 DO_2OP_SCALAR(VQSUB_U_scalar
, vqsubu_scalar
)
1086 DO_2OP_SCALAR(VQDMULH_scalar
, vqdmulh_scalar
)
1087 DO_2OP_SCALAR(VQRDMULH_scalar
, vqrdmulh_scalar
)
1088 DO_2OP_SCALAR(VBRSR
, vbrsr
)
1089 DO_2OP_SCALAR(VMLA
, vmla
)
1090 DO_2OP_SCALAR(VMLAS
, vmlas
)
1091 DO_2OP_SCALAR(VQDMLAH
, vqdmlah
)
1092 DO_2OP_SCALAR(VQRDMLAH
, vqrdmlah
)
1093 DO_2OP_SCALAR(VQDMLASH
, vqdmlash
)
1094 DO_2OP_SCALAR(VQRDMLASH
, vqrdmlash
)
1096 static bool trans_VQDMULLB_scalar(DisasContext
*s
, arg_2scalar
*a
)
1098 static MVEGenTwoOpScalarFn
* const fns
[] = {
1100 gen_helper_mve_vqdmullb_scalarh
,
1101 gen_helper_mve_vqdmullb_scalarw
,
1104 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1105 /* UNPREDICTABLE; we choose to undef */
1108 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1111 static bool trans_VQDMULLT_scalar(DisasContext
*s
, arg_2scalar
*a
)
1113 static MVEGenTwoOpScalarFn
* const fns
[] = {
1115 gen_helper_mve_vqdmullt_scalarh
,
1116 gen_helper_mve_vqdmullt_scalarw
,
1119 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1120 /* UNPREDICTABLE; we choose to undef */
1123 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1127 #define DO_2OP_FP_SCALAR(INSN, FN) \
1128 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1130 static MVEGenTwoOpScalarFn * const fns[] = { \
1132 gen_helper_mve_##FN##h, \
1133 gen_helper_mve_##FN##s, \
1136 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1139 return do_2op_scalar(s, a, fns[a->size]); \
1142 DO_2OP_FP_SCALAR(VADD_fp_scalar
, vfadd_scalar
)
1143 DO_2OP_FP_SCALAR(VSUB_fp_scalar
, vfsub_scalar
)
1144 DO_2OP_FP_SCALAR(VMUL_fp_scalar
, vfmul_scalar
)
1145 DO_2OP_FP_SCALAR(VFMA_scalar
, vfma_scalar
)
1146 DO_2OP_FP_SCALAR(VFMAS_scalar
, vfmas_scalar
)
1148 static bool do_long_dual_acc(DisasContext
*s
, arg_vmlaldav
*a
,
1149 MVEGenLongDualAccOpFn
*fn
)
1152 TCGv_i64 rda_i
, rda_o
;
1153 TCGv_i32 rdalo
, rdahi
;
1155 if (!dc_isar_feature(aa32_mve
, s
) ||
1156 !mve_check_qreg_bank(s
, a
->qn
| a
->qm
) ||
1161 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1162 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1164 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1167 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1171 qn
= mve_qreg_ptr(a
->qn
);
1172 qm
= mve_qreg_ptr(a
->qm
);
1175 * This insn is subject to beat-wise execution. Partial execution
1176 * of an A=0 (no-accumulate) insn which does not execute the first
1177 * beat must start with the current rda value, not 0.
1179 rda_o
= tcg_temp_new_i64();
1180 if (a
->a
|| mve_skip_first_beat(s
)) {
1182 rdalo
= load_reg(s
, a
->rdalo
);
1183 rdahi
= load_reg(s
, a
->rdahi
);
1184 tcg_gen_concat_i32_i64(rda_i
, rdalo
, rdahi
);
1186 rda_i
= tcg_constant_i64(0);
1189 fn(rda_o
, cpu_env
, qn
, qm
, rda_i
);
1191 rdalo
= tcg_temp_new_i32();
1192 rdahi
= tcg_temp_new_i32();
1193 tcg_gen_extrl_i64_i32(rdalo
, rda_o
);
1194 tcg_gen_extrh_i64_i32(rdahi
, rda_o
);
1195 store_reg(s
, a
->rdalo
, rdalo
);
1196 store_reg(s
, a
->rdahi
, rdahi
);
1201 static bool trans_VMLALDAV_S(DisasContext
*s
, arg_vmlaldav
*a
)
1203 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1205 { gen_helper_mve_vmlaldavsh
, gen_helper_mve_vmlaldavxsh
},
1206 { gen_helper_mve_vmlaldavsw
, gen_helper_mve_vmlaldavxsw
},
1209 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1212 static bool trans_VMLALDAV_U(DisasContext
*s
, arg_vmlaldav
*a
)
1214 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1216 { gen_helper_mve_vmlaldavuh
, NULL
},
1217 { gen_helper_mve_vmlaldavuw
, NULL
},
1220 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1223 static bool trans_VMLSLDAV(DisasContext
*s
, arg_vmlaldav
*a
)
1225 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1227 { gen_helper_mve_vmlsldavsh
, gen_helper_mve_vmlsldavxsh
},
1228 { gen_helper_mve_vmlsldavsw
, gen_helper_mve_vmlsldavxsw
},
1231 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1234 static bool trans_VRMLALDAVH_S(DisasContext
*s
, arg_vmlaldav
*a
)
1236 static MVEGenLongDualAccOpFn
* const fns
[] = {
1237 gen_helper_mve_vrmlaldavhsw
, gen_helper_mve_vrmlaldavhxsw
,
1239 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1242 static bool trans_VRMLALDAVH_U(DisasContext
*s
, arg_vmlaldav
*a
)
1244 static MVEGenLongDualAccOpFn
* const fns
[] = {
1245 gen_helper_mve_vrmlaldavhuw
, NULL
,
1247 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1250 static bool trans_VRMLSLDAVH(DisasContext
*s
, arg_vmlaldav
*a
)
1252 static MVEGenLongDualAccOpFn
* const fns
[] = {
1253 gen_helper_mve_vrmlsldavhsw
, gen_helper_mve_vrmlsldavhxsw
,
1255 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1258 static bool do_dual_acc(DisasContext
*s
, arg_vmladav
*a
, MVEGenDualAccOpFn
*fn
)
1261 TCGv_i32 rda_i
, rda_o
;
1263 if (!dc_isar_feature(aa32_mve
, s
) ||
1264 !mve_check_qreg_bank(s
, a
->qn
) ||
1268 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1272 qn
= mve_qreg_ptr(a
->qn
);
1273 qm
= mve_qreg_ptr(a
->qm
);
1276 * This insn is subject to beat-wise execution. Partial execution
1277 * of an A=0 (no-accumulate) insn which does not execute the first
1278 * beat must start with the current rda value, not 0.
1280 if (a
->a
|| mve_skip_first_beat(s
)) {
1281 rda_o
= rda_i
= load_reg(s
, a
->rda
);
1283 rda_i
= tcg_constant_i32(0);
1284 rda_o
= tcg_temp_new_i32();
1287 fn(rda_o
, cpu_env
, qn
, qm
, rda_i
);
1288 store_reg(s
, a
->rda
, rda_o
);
1294 #define DO_DUAL_ACC(INSN, FN) \
1295 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1297 static MVEGenDualAccOpFn * const fns[4][2] = { \
1298 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1299 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1300 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1303 return do_dual_acc(s, a, fns[a->size][a->x]); \
1306 DO_DUAL_ACC(VMLADAV_S
, vmladavs
)
1307 DO_DUAL_ACC(VMLSDAV
, vmlsdav
)
1309 static bool trans_VMLADAV_U(DisasContext
*s
, arg_vmladav
*a
)
1311 static MVEGenDualAccOpFn
* const fns
[4][2] = {
1312 { gen_helper_mve_vmladavub
, NULL
},
1313 { gen_helper_mve_vmladavuh
, NULL
},
1314 { gen_helper_mve_vmladavuw
, NULL
},
1317 return do_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1320 static void gen_vpst(DisasContext
*s
, uint32_t mask
)
1323 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1324 * being adjacent fields in the register.
1326 * Updating the masks is not predicated, but it is subject to beat-wise
1327 * execution, and the mask is updated on the odd-numbered beats.
1328 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1331 TCGv_i32 vpr
= load_cpu_field(v7m
.vpr
);
1335 /* Update both 01 and 23 fields */
1336 tcg_gen_deposit_i32(vpr
, vpr
,
1337 tcg_constant_i32(mask
| (mask
<< 4)),
1338 R_V7M_VPR_MASK01_SHIFT
,
1339 R_V7M_VPR_MASK01_LENGTH
+ R_V7M_VPR_MASK23_LENGTH
);
1344 /* Update only the 23 mask field */
1345 tcg_gen_deposit_i32(vpr
, vpr
,
1346 tcg_constant_i32(mask
),
1347 R_V7M_VPR_MASK23_SHIFT
, R_V7M_VPR_MASK23_LENGTH
);
1350 g_assert_not_reached();
1352 store_cpu_field(vpr
, v7m
.vpr
);
1355 static bool trans_VPST(DisasContext
*s
, arg_VPST
*a
)
1357 /* mask == 0 is a "related encoding" */
1358 if (!dc_isar_feature(aa32_mve
, s
) || !a
->mask
) {
1361 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1364 gen_vpst(s
, a
->mask
);
1365 mve_update_and_store_eci(s
);
1369 static bool trans_VPNOT(DisasContext
*s
, arg_VPNOT
*a
)
1372 * Invert the predicate in VPR.P0. We have call out to
1373 * a helper because this insn itself is beatwise and can
1376 if (!dc_isar_feature(aa32_mve
, s
)) {
1379 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1383 gen_helper_mve_vpnot(cpu_env
);
1384 /* This insn updates predication bits */
1385 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
1390 static bool trans_VADDV(DisasContext
*s
, arg_VADDV
*a
)
1392 /* VADDV: vector add across vector */
1393 static MVEGenVADDVFn
* const fns
[4][2] = {
1394 { gen_helper_mve_vaddvsb
, gen_helper_mve_vaddvub
},
1395 { gen_helper_mve_vaddvsh
, gen_helper_mve_vaddvuh
},
1396 { gen_helper_mve_vaddvsw
, gen_helper_mve_vaddvuw
},
1400 TCGv_i32 rda_i
, rda_o
;
1402 if (!dc_isar_feature(aa32_mve
, s
) ||
1406 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1411 * This insn is subject to beat-wise execution. Partial execution
1412 * of an A=0 (no-accumulate) insn which does not execute the first
1413 * beat must start with the current value of Rda, not zero.
1415 if (a
->a
|| mve_skip_first_beat(s
)) {
1416 /* Accumulate input from Rda */
1417 rda_o
= rda_i
= load_reg(s
, a
->rda
);
1419 /* Accumulate starting at zero */
1420 rda_i
= tcg_constant_i32(0);
1421 rda_o
= tcg_temp_new_i32();
1424 qm
= mve_qreg_ptr(a
->qm
);
1425 fns
[a
->size
][a
->u
](rda_o
, cpu_env
, qm
, rda_i
);
1426 store_reg(s
, a
->rda
, rda_o
);
1432 static bool trans_VADDLV(DisasContext
*s
, arg_VADDLV
*a
)
1435 * Vector Add Long Across Vector: accumulate the 32-bit
1436 * elements of the vector into a 64-bit result stored in
1437 * a pair of general-purpose registers.
1438 * No need to check Qm's bank: it is only 3 bits in decode.
1441 TCGv_i64 rda_i
, rda_o
;
1442 TCGv_i32 rdalo
, rdahi
;
1444 if (!dc_isar_feature(aa32_mve
, s
)) {
1448 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1449 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1451 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1454 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1459 * This insn is subject to beat-wise execution. Partial execution
1460 * of an A=0 (no-accumulate) insn which does not execute the first
1461 * beat must start with the current value of RdaHi:RdaLo, not zero.
1463 rda_o
= tcg_temp_new_i64();
1464 if (a
->a
|| mve_skip_first_beat(s
)) {
1465 /* Accumulate input from RdaHi:RdaLo */
1467 rdalo
= load_reg(s
, a
->rdalo
);
1468 rdahi
= load_reg(s
, a
->rdahi
);
1469 tcg_gen_concat_i32_i64(rda_i
, rdalo
, rdahi
);
1471 /* Accumulate starting at zero */
1472 rda_i
= tcg_constant_i64(0);
1475 qm
= mve_qreg_ptr(a
->qm
);
1477 gen_helper_mve_vaddlv_u(rda_o
, cpu_env
, qm
, rda_i
);
1479 gen_helper_mve_vaddlv_s(rda_o
, cpu_env
, qm
, rda_i
);
1482 rdalo
= tcg_temp_new_i32();
1483 rdahi
= tcg_temp_new_i32();
1484 tcg_gen_extrl_i64_i32(rdalo
, rda_o
);
1485 tcg_gen_extrh_i64_i32(rdahi
, rda_o
);
1486 store_reg(s
, a
->rdalo
, rdalo
);
1487 store_reg(s
, a
->rdahi
, rdahi
);
1492 static bool do_1imm(DisasContext
*s
, arg_1imm
*a
, MVEGenOneOpImmFn
*fn
,
1498 if (!dc_isar_feature(aa32_mve
, s
) ||
1499 !mve_check_qreg_bank(s
, a
->qd
) ||
1503 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1507 imm
= asimd_imm_const(a
->imm
, a
->cmode
, a
->op
);
1509 if (vecfn
&& mve_no_predication(s
)) {
1510 vecfn(MO_64
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qd
),
1513 qd
= mve_qreg_ptr(a
->qd
);
1514 fn(cpu_env
, qd
, tcg_constant_i64(imm
));
1520 static void gen_gvec_vmovi(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1521 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
1523 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, c
);
1526 static bool trans_Vimm_1r(DisasContext
*s
, arg_1imm
*a
)
1528 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1529 MVEGenOneOpImmFn
*fn
;
1532 if ((a
->cmode
& 1) && a
->cmode
< 12) {
1535 * For op=1, the immediate will be inverted by asimd_imm_const(),
1536 * so the VBIC becomes a logical AND operation.
1538 fn
= gen_helper_mve_vandi
;
1539 vecfn
= tcg_gen_gvec_andi
;
1541 fn
= gen_helper_mve_vorri
;
1542 vecfn
= tcg_gen_gvec_ori
;
1545 /* There is one unallocated cmode/op combination in this space */
1546 if (a
->cmode
== 15 && a
->op
== 1) {
1549 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1550 fn
= gen_helper_mve_vmovi
;
1551 vecfn
= gen_gvec_vmovi
;
1553 return do_1imm(s
, a
, fn
, vecfn
);
1556 static bool do_2shift_vec(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1557 bool negateshift
, GVecGen2iFn vecfn
)
1560 int shift
= a
->shift
;
1562 if (!dc_isar_feature(aa32_mve
, s
) ||
1563 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
1567 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1572 * When we handle a right shift insn using a left-shift helper
1573 * which permits a negative shift count to indicate a right-shift,
1574 * we must negate the shift count.
1580 if (vecfn
&& mve_no_predication(s
)) {
1581 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
),
1584 qd
= mve_qreg_ptr(a
->qd
);
1585 qm
= mve_qreg_ptr(a
->qm
);
1586 fn(cpu_env
, qd
, qm
, tcg_constant_i32(shift
));
1592 static bool do_2shift(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1595 return do_2shift_vec(s
, a
, fn
, negateshift
, NULL
);
1598 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1599 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1601 static MVEGenTwoOpShiftFn * const fns[] = { \
1602 gen_helper_mve_##FN##b, \
1603 gen_helper_mve_##FN##h, \
1604 gen_helper_mve_##FN##w, \
1607 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1610 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1611 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1613 static void do_gvec_shri_s(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1614 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1617 * We get here with a negated shift count, and we must handle
1618 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1621 if (shift
== (8 << vece
)) {
1624 tcg_gen_gvec_sari(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1627 static void do_gvec_shri_u(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1628 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1631 * We get here with a negated shift count, and we must handle
1632 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1635 if (shift
== (8 << vece
)) {
1636 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, 0);
1638 tcg_gen_gvec_shri(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1642 DO_2SHIFT_VEC(VSHLI
, vshli_u
, false, tcg_gen_gvec_shli
)
1643 DO_2SHIFT(VQSHLI_S
, vqshli_s
, false)
1644 DO_2SHIFT(VQSHLI_U
, vqshli_u
, false)
1645 DO_2SHIFT(VQSHLUI
, vqshlui_s
, false)
1646 /* These right shifts use a left-shift helper with negated shift count */
1647 DO_2SHIFT_VEC(VSHRI_S
, vshli_s
, true, do_gvec_shri_s
)
1648 DO_2SHIFT_VEC(VSHRI_U
, vshli_u
, true, do_gvec_shri_u
)
1649 DO_2SHIFT(VRSHRI_S
, vrshli_s
, true)
1650 DO_2SHIFT(VRSHRI_U
, vrshli_u
, true)
1652 DO_2SHIFT_VEC(VSRI
, vsri
, false, gen_gvec_sri
)
1653 DO_2SHIFT_VEC(VSLI
, vsli
, false, gen_gvec_sli
)
1655 #define DO_2SHIFT_FP(INSN, FN) \
1656 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1658 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1661 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1664 DO_2SHIFT_FP(VCVT_SH_fixed
, vcvt_sh
)
1665 DO_2SHIFT_FP(VCVT_UH_fixed
, vcvt_uh
)
1666 DO_2SHIFT_FP(VCVT_HS_fixed
, vcvt_hs
)
1667 DO_2SHIFT_FP(VCVT_HU_fixed
, vcvt_hu
)
1668 DO_2SHIFT_FP(VCVT_SF_fixed
, vcvt_sf
)
1669 DO_2SHIFT_FP(VCVT_UF_fixed
, vcvt_uf
)
1670 DO_2SHIFT_FP(VCVT_FS_fixed
, vcvt_fs
)
1671 DO_2SHIFT_FP(VCVT_FU_fixed
, vcvt_fu
)
1673 static bool do_2shift_scalar(DisasContext
*s
, arg_shl_scalar
*a
,
1674 MVEGenTwoOpShiftFn
*fn
)
1679 if (!dc_isar_feature(aa32_mve
, s
) ||
1680 !mve_check_qreg_bank(s
, a
->qda
) ||
1681 a
->rm
== 13 || a
->rm
== 15 || !fn
) {
1682 /* Rm cases are UNPREDICTABLE */
1685 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1689 qda
= mve_qreg_ptr(a
->qda
);
1690 rm
= load_reg(s
, a
->rm
);
1691 fn(cpu_env
, qda
, qda
, rm
);
1696 #define DO_2SHIFT_SCALAR(INSN, FN) \
1697 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1699 static MVEGenTwoOpShiftFn * const fns[] = { \
1700 gen_helper_mve_##FN##b, \
1701 gen_helper_mve_##FN##h, \
1702 gen_helper_mve_##FN##w, \
1705 return do_2shift_scalar(s, a, fns[a->size]); \
1708 DO_2SHIFT_SCALAR(VSHL_S_scalar
, vshli_s
)
1709 DO_2SHIFT_SCALAR(VSHL_U_scalar
, vshli_u
)
1710 DO_2SHIFT_SCALAR(VRSHL_S_scalar
, vrshli_s
)
1711 DO_2SHIFT_SCALAR(VRSHL_U_scalar
, vrshli_u
)
1712 DO_2SHIFT_SCALAR(VQSHL_S_scalar
, vqshli_s
)
1713 DO_2SHIFT_SCALAR(VQSHL_U_scalar
, vqshli_u
)
1714 DO_2SHIFT_SCALAR(VQRSHL_S_scalar
, vqrshli_s
)
1715 DO_2SHIFT_SCALAR(VQRSHL_U_scalar
, vqrshli_u
)
1717 #define DO_VSHLL(INSN, FN) \
1718 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1720 static MVEGenTwoOpShiftFn * const fns[] = { \
1721 gen_helper_mve_##FN##b, \
1722 gen_helper_mve_##FN##h, \
1724 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1728 * For the VSHLL vector helpers, the vece is the size of the input
1729 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1730 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1732 static void do_gvec_vshllbs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1733 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1735 unsigned ovece
= vece
+ 1;
1736 unsigned ibits
= vece
== MO_8
? 8 : 16;
1737 tcg_gen_gvec_shli(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1738 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1741 static void do_gvec_vshllbu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1742 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1744 unsigned ovece
= vece
+ 1;
1745 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1746 ovece
== MO_16
? 0xff : 0xffff, oprsz
, maxsz
);
1747 tcg_gen_gvec_shli(ovece
, dofs
, dofs
, shift
, oprsz
, maxsz
);
1750 static void do_gvec_vshllts(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1751 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1753 unsigned ovece
= vece
+ 1;
1754 unsigned ibits
= vece
== MO_8
? 8 : 16;
1756 tcg_gen_gvec_sari(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1758 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1759 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1760 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1764 static void do_gvec_vshlltu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1765 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1767 unsigned ovece
= vece
+ 1;
1768 unsigned ibits
= vece
== MO_8
? 8 : 16;
1770 tcg_gen_gvec_shri(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1772 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1773 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1774 tcg_gen_gvec_shri(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1778 DO_VSHLL(VSHLL_BS
, vshllbs
)
1779 DO_VSHLL(VSHLL_BU
, vshllbu
)
1780 DO_VSHLL(VSHLL_TS
, vshllts
)
1781 DO_VSHLL(VSHLL_TU
, vshlltu
)
1783 #define DO_2SHIFT_N(INSN, FN) \
1784 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1786 static MVEGenTwoOpShiftFn * const fns[] = { \
1787 gen_helper_mve_##FN##b, \
1788 gen_helper_mve_##FN##h, \
1790 return do_2shift(s, a, fns[a->size], false); \
1793 DO_2SHIFT_N(VSHRNB
, vshrnb
)
1794 DO_2SHIFT_N(VSHRNT
, vshrnt
)
1795 DO_2SHIFT_N(VRSHRNB
, vrshrnb
)
1796 DO_2SHIFT_N(VRSHRNT
, vrshrnt
)
1797 DO_2SHIFT_N(VQSHRNB_S
, vqshrnb_s
)
1798 DO_2SHIFT_N(VQSHRNT_S
, vqshrnt_s
)
1799 DO_2SHIFT_N(VQSHRNB_U
, vqshrnb_u
)
1800 DO_2SHIFT_N(VQSHRNT_U
, vqshrnt_u
)
1801 DO_2SHIFT_N(VQSHRUNB
, vqshrunb
)
1802 DO_2SHIFT_N(VQSHRUNT
, vqshrunt
)
1803 DO_2SHIFT_N(VQRSHRNB_S
, vqrshrnb_s
)
1804 DO_2SHIFT_N(VQRSHRNT_S
, vqrshrnt_s
)
1805 DO_2SHIFT_N(VQRSHRNB_U
, vqrshrnb_u
)
1806 DO_2SHIFT_N(VQRSHRNT_U
, vqrshrnt_u
)
1807 DO_2SHIFT_N(VQRSHRUNB
, vqrshrunb
)
1808 DO_2SHIFT_N(VQRSHRUNT
, vqrshrunt
)
1810 static bool trans_VSHLC(DisasContext
*s
, arg_VSHLC
*a
)
1813 * Whole Vector Left Shift with Carry. The carry is taken
1814 * from a general purpose register and written back there.
1815 * An imm of 0 means "shift by 32".
1820 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1823 if (a
->rdm
== 13 || a
->rdm
== 15) {
1824 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1827 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1831 qd
= mve_qreg_ptr(a
->qd
);
1832 rdm
= load_reg(s
, a
->rdm
);
1833 gen_helper_mve_vshlc(rdm
, cpu_env
, qd
, rdm
, tcg_constant_i32(a
->imm
));
1834 store_reg(s
, a
->rdm
, rdm
);
1839 static bool do_vidup(DisasContext
*s
, arg_vidup
*a
, MVEGenVIDUPFn
*fn
)
1845 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1846 * This fills the vector with elements of successively increasing
1847 * or decreasing values, starting from Rn.
1849 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1852 if (a
->size
== MO_64
) {
1853 /* size 0b11 is another encoding */
1856 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1860 qd
= mve_qreg_ptr(a
->qd
);
1861 rn
= load_reg(s
, a
->rn
);
1862 fn(rn
, cpu_env
, qd
, rn
, tcg_constant_i32(a
->imm
));
1863 store_reg(s
, a
->rn
, rn
);
1868 static bool do_viwdup(DisasContext
*s
, arg_viwdup
*a
, MVEGenVIWDUPFn
*fn
)
1874 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1875 * This fills the vector with elements of successively increasing
1876 * or decreasing values, starting from Rn. Rm specifies a point where
1877 * the count wraps back around to 0. The updated offset is written back
1880 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1883 if (!fn
|| a
->rm
== 13 || a
->rm
== 15) {
1885 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1886 * Rm == 13 is VIWDUP, VDWDUP.
1890 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1894 qd
= mve_qreg_ptr(a
->qd
);
1895 rn
= load_reg(s
, a
->rn
);
1896 rm
= load_reg(s
, a
->rm
);
1897 fn(rn
, cpu_env
, qd
, rn
, rm
, tcg_constant_i32(a
->imm
));
1898 store_reg(s
, a
->rn
, rn
);
1903 static bool trans_VIDUP(DisasContext
*s
, arg_vidup
*a
)
1905 static MVEGenVIDUPFn
* const fns
[] = {
1906 gen_helper_mve_vidupb
,
1907 gen_helper_mve_viduph
,
1908 gen_helper_mve_vidupw
,
1911 return do_vidup(s
, a
, fns
[a
->size
]);
1914 static bool trans_VDDUP(DisasContext
*s
, arg_vidup
*a
)
1916 static MVEGenVIDUPFn
* const fns
[] = {
1917 gen_helper_mve_vidupb
,
1918 gen_helper_mve_viduph
,
1919 gen_helper_mve_vidupw
,
1922 /* VDDUP is just like VIDUP but with a negative immediate */
1924 return do_vidup(s
, a
, fns
[a
->size
]);
1927 static bool trans_VIWDUP(DisasContext
*s
, arg_viwdup
*a
)
1929 static MVEGenVIWDUPFn
* const fns
[] = {
1930 gen_helper_mve_viwdupb
,
1931 gen_helper_mve_viwduph
,
1932 gen_helper_mve_viwdupw
,
1935 return do_viwdup(s
, a
, fns
[a
->size
]);
1938 static bool trans_VDWDUP(DisasContext
*s
, arg_viwdup
*a
)
1940 static MVEGenVIWDUPFn
* const fns
[] = {
1941 gen_helper_mve_vdwdupb
,
1942 gen_helper_mve_vdwduph
,
1943 gen_helper_mve_vdwdupw
,
1946 return do_viwdup(s
, a
, fns
[a
->size
]);
1949 static bool do_vcmp(DisasContext
*s
, arg_vcmp
*a
, MVEGenCmpFn
*fn
)
1953 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
1957 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1961 qn
= mve_qreg_ptr(a
->qn
);
1962 qm
= mve_qreg_ptr(a
->qm
);
1963 fn(cpu_env
, qn
, qm
);
1966 gen_vpst(s
, a
->mask
);
1968 /* This insn updates predication bits */
1969 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
1974 static bool do_vcmp_scalar(DisasContext
*s
, arg_vcmp_scalar
*a
,
1975 MVEGenScalarCmpFn
*fn
)
1980 if (!dc_isar_feature(aa32_mve
, s
) || !fn
|| a
->rm
== 13) {
1983 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1987 qn
= mve_qreg_ptr(a
->qn
);
1989 /* Encoding Rm=0b1111 means "constant zero" */
1990 rm
= tcg_constant_i32(0);
1992 rm
= load_reg(s
, a
->rm
);
1994 fn(cpu_env
, qn
, rm
);
1997 gen_vpst(s
, a
->mask
);
1999 /* This insn updates predication bits */
2000 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2005 #define DO_VCMP(INSN, FN) \
2006 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2008 static MVEGenCmpFn * const fns[] = { \
2009 gen_helper_mve_##FN##b, \
2010 gen_helper_mve_##FN##h, \
2011 gen_helper_mve_##FN##w, \
2014 return do_vcmp(s, a, fns[a->size]); \
2016 static bool trans_##INSN##_scalar(DisasContext *s, \
2017 arg_vcmp_scalar *a) \
2019 static MVEGenScalarCmpFn * const fns[] = { \
2020 gen_helper_mve_##FN##_scalarb, \
2021 gen_helper_mve_##FN##_scalarh, \
2022 gen_helper_mve_##FN##_scalarw, \
2025 return do_vcmp_scalar(s, a, fns[a->size]); \
2028 DO_VCMP(VCMPEQ
, vcmpeq
)
2029 DO_VCMP(VCMPNE
, vcmpne
)
2030 DO_VCMP(VCMPCS
, vcmpcs
)
2031 DO_VCMP(VCMPHI
, vcmphi
)
2032 DO_VCMP(VCMPGE
, vcmpge
)
2033 DO_VCMP(VCMPLT
, vcmplt
)
2034 DO_VCMP(VCMPGT
, vcmpgt
)
2035 DO_VCMP(VCMPLE
, vcmple
)
2037 #define DO_VCMP_FP(INSN, FN) \
2038 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2040 static MVEGenCmpFn * const fns[] = { \
2042 gen_helper_mve_##FN##h, \
2043 gen_helper_mve_##FN##s, \
2046 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2049 return do_vcmp(s, a, fns[a->size]); \
2051 static bool trans_##INSN##_scalar(DisasContext *s, \
2052 arg_vcmp_scalar *a) \
2054 static MVEGenScalarCmpFn * const fns[] = { \
2056 gen_helper_mve_##FN##_scalarh, \
2057 gen_helper_mve_##FN##_scalars, \
2060 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2063 return do_vcmp_scalar(s, a, fns[a->size]); \
2066 DO_VCMP_FP(VCMPEQ_fp
, vfcmpeq
)
2067 DO_VCMP_FP(VCMPNE_fp
, vfcmpne
)
2068 DO_VCMP_FP(VCMPGE_fp
, vfcmpge
)
2069 DO_VCMP_FP(VCMPLT_fp
, vfcmplt
)
2070 DO_VCMP_FP(VCMPGT_fp
, vfcmpgt
)
2071 DO_VCMP_FP(VCMPLE_fp
, vfcmple
)
2073 static bool do_vmaxv(DisasContext
*s
, arg_vmaxv
*a
, MVEGenVADDVFn fn
)
2076 * MIN/MAX operations across a vector: compute the min or
2077 * max of the initial value in a general purpose register
2078 * and all the elements in the vector, and store it back
2079 * into the general purpose register.
2084 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
2085 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2086 /* Rda cases are UNPREDICTABLE */
2089 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2093 qm
= mve_qreg_ptr(a
->qm
);
2094 rda
= load_reg(s
, a
->rda
);
2095 fn(rda
, cpu_env
, qm
, rda
);
2096 store_reg(s
, a
->rda
, rda
);
2101 #define DO_VMAXV(INSN, FN) \
2102 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2104 static MVEGenVADDVFn * const fns[] = { \
2105 gen_helper_mve_##FN##b, \
2106 gen_helper_mve_##FN##h, \
2107 gen_helper_mve_##FN##w, \
2110 return do_vmaxv(s, a, fns[a->size]); \
2113 DO_VMAXV(VMAXV_S
, vmaxvs
)
2114 DO_VMAXV(VMAXV_U
, vmaxvu
)
2115 DO_VMAXV(VMAXAV
, vmaxav
)
2116 DO_VMAXV(VMINV_S
, vminvs
)
2117 DO_VMAXV(VMINV_U
, vminvu
)
2118 DO_VMAXV(VMINAV
, vminav
)
2120 #define DO_VMAXV_FP(INSN, FN) \
2121 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2123 static MVEGenVADDVFn * const fns[] = { \
2125 gen_helper_mve_##FN##h, \
2126 gen_helper_mve_##FN##s, \
2129 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2132 return do_vmaxv(s, a, fns[a->size]); \
2135 DO_VMAXV_FP(VMAXNMV
, vmaxnmv
)
2136 DO_VMAXV_FP(VMINNMV
, vminnmv
)
2137 DO_VMAXV_FP(VMAXNMAV
, vmaxnmav
)
2138 DO_VMAXV_FP(VMINNMAV
, vminnmav
)
2140 static bool do_vabav(DisasContext
*s
, arg_vabav
*a
, MVEGenVABAVFn
*fn
)
2142 /* Absolute difference accumulated across vector */
2146 if (!dc_isar_feature(aa32_mve
, s
) ||
2147 !mve_check_qreg_bank(s
, a
->qm
| a
->qn
) ||
2148 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2149 /* Rda cases are UNPREDICTABLE */
2152 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2156 qm
= mve_qreg_ptr(a
->qm
);
2157 qn
= mve_qreg_ptr(a
->qn
);
2158 rda
= load_reg(s
, a
->rda
);
2159 fn(rda
, cpu_env
, qn
, qm
, rda
);
2160 store_reg(s
, a
->rda
, rda
);
2165 #define DO_VABAV(INSN, FN) \
2166 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2168 static MVEGenVABAVFn * const fns[] = { \
2169 gen_helper_mve_##FN##b, \
2170 gen_helper_mve_##FN##h, \
2171 gen_helper_mve_##FN##w, \
2174 return do_vabav(s, a, fns[a->size]); \
2177 DO_VABAV(VABAV_S
, vabavs
)
2178 DO_VABAV(VABAV_U
, vabavu
)
2180 static bool trans_VMOV_to_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2183 * VMOV two 32-bit vector lanes to two general-purpose registers.
2184 * This insn is not predicated but it is subject to beat-wise
2185 * execution if it is not in an IT block. For us this means
2186 * only that if PSR.ECI says we should not be executing the beat
2187 * corresponding to the lane of the vector register being accessed
2188 * then we should skip perfoming the move, and that we need to do
2189 * the usual check for bad ECI state and advance of ECI state.
2190 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2195 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2196 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15 ||
2198 /* Rt/Rt2 cases are UNPREDICTABLE */
2201 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2205 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2208 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2209 tmp
= tcg_temp_new_i32();
2210 read_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2211 store_reg(s
, a
->rt
, tmp
);
2213 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2214 tmp
= tcg_temp_new_i32();
2215 read_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2216 store_reg(s
, a
->rt2
, tmp
);
2219 mve_update_and_store_eci(s
);
2223 static bool trans_VMOV_from_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2226 * VMOV two general-purpose registers to two 32-bit vector lanes.
2227 * This insn is not predicated but it is subject to beat-wise
2228 * execution if it is not in an IT block. For us this means
2229 * only that if PSR.ECI says we should not be executing the beat
2230 * corresponding to the lane of the vector register being accessed
2231 * then we should skip perfoming the move, and that we need to do
2232 * the usual check for bad ECI state and advance of ECI state.
2233 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2238 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2239 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15) {
2240 /* Rt/Rt2 cases are UNPREDICTABLE */
2243 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2247 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2250 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2251 tmp
= load_reg(s
, a
->rt
);
2252 write_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2254 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2255 tmp
= load_reg(s
, a
->rt2
);
2256 write_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2259 mve_update_and_store_eci(s
);