2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64
, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
44 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
48 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(int x
)
59 x
>>= 3; /* discard imm3 */
63 static int tszimm_shr(int x
)
65 return (16 << tszimm_esz(x
)) - x
;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(int x
)
71 return x
- (8 << tszimm_esz(x
));
74 static inline int plus1(int x
)
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(int x
)
82 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(int x
)
87 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(int msz
)
95 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
114 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext
*s
)
120 return s
->sve_len
>> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size
)
136 return QEMU_ALIGN_UP(size
, 16);
140 static int pred_gvec_reg_size(DisasContext
*s
)
142 return size_for_gvec(pred_full_reg_size(s
));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
147 int esz
, int rd
, int rn
)
149 if (sve_access_check(s
)) {
150 unsigned vsz
= vec_full_reg_size(s
);
151 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
152 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
159 int esz
, int rd
, int rn
, int rm
)
161 if (sve_access_check(s
)) {
162 unsigned vsz
= vec_full_reg_size(s
);
163 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
164 vec_full_reg_offset(s
, rn
),
165 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
173 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
179 unsigned vsz
= vec_full_reg_size(s
);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
185 int esz
, int rd
, int rn
)
187 if (sve_access_check(s
)) {
188 unsigned psz
= pred_gvec_reg_size(s
);
189 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
190 pred_full_reg_offset(s
, rn
), psz
, psz
);
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
197 int esz
, int rd
, int rn
, int rm
)
199 if (sve_access_check(s
)) {
200 unsigned psz
= pred_gvec_reg_size(s
);
201 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
202 pred_full_reg_offset(s
, rn
),
203 pred_full_reg_offset(s
, rm
), psz
, psz
);
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
210 int rd
, int rn
, int rm
, int rg
)
212 if (sve_access_check(s
)) {
213 unsigned psz
= pred_gvec_reg_size(s
);
214 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
215 pred_full_reg_offset(s
, rn
),
216 pred_full_reg_offset(s
, rm
),
217 pred_full_reg_offset(s
, rg
),
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
226 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t
)
232 tcg_gen_mov_i32(cpu_NF
, t
);
233 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
234 tcg_gen_andi_i32(cpu_CF
, t
, 1);
235 tcg_gen_movi_i32(cpu_VF
, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
241 TCGv_i32 t
= tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t
, d
, g
);
245 tcg_temp_free_i32(t
);
248 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
250 TCGv_ptr dptr
= tcg_temp_new_ptr();
251 TCGv_ptr gptr
= tcg_temp_new_ptr();
254 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
255 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
256 t
= tcg_const_i32(words
);
258 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
259 tcg_temp_free_ptr(dptr
);
260 tcg_temp_free_ptr(gptr
);
263 tcg_temp_free_i32(t
);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks
[4] = {
268 0xffffffffffffffffull
, 0x5555555555555555ull
,
269 0x1111111111111111ull
, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
278 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
281 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
283 if (a
->rn
== a
->rm
) { /* MOV */
284 return do_mov_z(s
, a
->rd
, a
->rn
);
286 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
290 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
292 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
295 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
297 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
301 *** SVE Integer Arithmetic - Unpredicated Group
304 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
306 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
309 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
311 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
314 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
316 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
319 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
321 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
324 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
326 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
329 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
331 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
335 *** SVE Integer Arithmetic - Binary Predicated Group
338 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
340 unsigned vsz
= vec_full_reg_size(s
);
344 if (sve_access_check(s
)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
346 vec_full_reg_offset(s
, a
->rn
),
347 vec_full_reg_offset(s
, a
->rm
),
348 pred_full_reg_offset(s
, a
->pg
),
354 #define DO_ZPZZ(NAME, name) \
355 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
358 static gen_helper_gvec_4 * const fns[4] = { \
359 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
360 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
362 return do_zpzz_ool(s, a, fns[a->esz]); \
381 DO_ZPZZ(SMULH
, smulh
)
382 DO_ZPZZ(UMULH
, umulh
)
388 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
390 static gen_helper_gvec_4
* const fns
[4] = {
391 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
393 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
396 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
398 static gen_helper_gvec_4
* const fns
[4] = {
399 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
401 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
409 *** SVE Integer Arithmetic - Unary Predicated Group
412 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
417 if (sve_access_check(s
)) {
418 unsigned vsz
= vec_full_reg_size(s
);
419 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
420 vec_full_reg_offset(s
, a
->rn
),
421 pred_full_reg_offset(s
, a
->pg
),
427 #define DO_ZPZ(NAME, name) \
428 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
430 static gen_helper_gvec_3 * const fns[4] = { \
431 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
432 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
434 return do_zpz_ool(s, a, fns[a->esz]); \
439 DO_ZPZ(CNT_zpz
, cnt_zpz
)
441 DO_ZPZ(NOT_zpz
, not_zpz
)
445 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
447 static gen_helper_gvec_3
* const fns
[4] = {
449 gen_helper_sve_fabs_h
,
450 gen_helper_sve_fabs_s
,
451 gen_helper_sve_fabs_d
453 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
456 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
458 static gen_helper_gvec_3
* const fns
[4] = {
460 gen_helper_sve_fneg_h
,
461 gen_helper_sve_fneg_s
,
462 gen_helper_sve_fneg_d
464 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
467 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
469 static gen_helper_gvec_3
* const fns
[4] = {
471 gen_helper_sve_sxtb_h
,
472 gen_helper_sve_sxtb_s
,
473 gen_helper_sve_sxtb_d
475 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
478 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
480 static gen_helper_gvec_3
* const fns
[4] = {
482 gen_helper_sve_uxtb_h
,
483 gen_helper_sve_uxtb_s
,
484 gen_helper_sve_uxtb_d
486 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
489 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
491 static gen_helper_gvec_3
* const fns
[4] = {
493 gen_helper_sve_sxth_s
,
494 gen_helper_sve_sxth_d
496 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
499 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
501 static gen_helper_gvec_3
* const fns
[4] = {
503 gen_helper_sve_uxth_s
,
504 gen_helper_sve_uxth_d
506 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
509 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
511 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
514 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
516 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
522 *** SVE Integer Reduction Group
525 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
526 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
527 gen_helper_gvec_reduc
*fn
)
529 unsigned vsz
= vec_full_reg_size(s
);
537 if (!sve_access_check(s
)) {
541 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
542 temp
= tcg_temp_new_i64();
543 t_zn
= tcg_temp_new_ptr();
544 t_pg
= tcg_temp_new_ptr();
546 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
547 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
548 fn(temp
, t_zn
, t_pg
, desc
);
549 tcg_temp_free_ptr(t_zn
);
550 tcg_temp_free_ptr(t_pg
);
551 tcg_temp_free_i32(desc
);
553 write_fp_dreg(s
, a
->rd
, temp
);
554 tcg_temp_free_i64(temp
);
558 #define DO_VPZ(NAME, name) \
559 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
561 static gen_helper_gvec_reduc * const fns[4] = { \
562 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
563 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
565 return do_vpz_ool(s, a, fns[a->esz]); \
578 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
580 static gen_helper_gvec_reduc
* const fns
[4] = {
581 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
582 gen_helper_sve_saddv_s
, NULL
584 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
590 *** SVE Shift by Immediate - Predicated Group
593 /* Store zero into every active element of Zd. We will use this for two
594 * and three-operand predicated instructions for which logic dictates a
597 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
599 static gen_helper_gvec_2
* const fns
[4] = {
600 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
601 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
603 if (sve_access_check(s
)) {
604 unsigned vsz
= vec_full_reg_size(s
);
605 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
606 pred_full_reg_offset(s
, pg
),
607 vsz
, vsz
, 0, fns
[esz
]);
612 /* Copy Zn into Zd, storing zeros into inactive elements. */
613 static void do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
, int esz
)
615 static gen_helper_gvec_3
* const fns
[4] = {
616 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
617 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
619 unsigned vsz
= vec_full_reg_size(s
);
620 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
621 vec_full_reg_offset(s
, rn
),
622 pred_full_reg_offset(s
, pg
),
623 vsz
, vsz
, 0, fns
[esz
]);
626 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
627 gen_helper_gvec_3
*fn
)
629 if (sve_access_check(s
)) {
630 unsigned vsz
= vec_full_reg_size(s
);
631 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
632 vec_full_reg_offset(s
, a
->rn
),
633 pred_full_reg_offset(s
, a
->pg
),
634 vsz
, vsz
, a
->imm
, fn
);
639 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
641 static gen_helper_gvec_3
* const fns
[4] = {
642 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
643 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
646 /* Invalid tsz encoding -- see tszimm_esz. */
649 /* Shift by element size is architecturally valid. For
650 arithmetic right-shift, it's the same as by one less. */
651 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
652 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
655 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
657 static gen_helper_gvec_3
* const fns
[4] = {
658 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
659 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
664 /* Shift by element size is architecturally valid.
665 For logical shifts, it is a zeroing operation. */
666 if (a
->imm
>= (8 << a
->esz
)) {
667 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
669 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
673 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
675 static gen_helper_gvec_3
* const fns
[4] = {
676 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
677 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a
->imm
>= (8 << a
->esz
)) {
685 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
687 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
691 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
693 static gen_helper_gvec_3
* const fns
[4] = {
694 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
695 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
700 /* Shift by element size is architecturally valid. For arithmetic
701 right shift for division, it is a zeroing operation. */
702 if (a
->imm
>= (8 << a
->esz
)) {
703 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
705 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
710 *** SVE Bitwise Shift - Predicated Group
713 #define DO_ZPZW(NAME, name) \
714 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
717 static gen_helper_gvec_4 * const fns[3] = { \
718 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
719 gen_helper_sve_##name##_zpzw_s, \
721 if (a->esz < 0 || a->esz >= 3) { \
724 return do_zpzz_ool(s, a, fns[a->esz]); \
734 *** SVE Bitwise Shift - Unpredicated Group
737 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
738 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
739 int64_t, uint32_t, uint32_t))
742 /* Invalid tsz encoding -- see tszimm_esz. */
745 if (sve_access_check(s
)) {
746 unsigned vsz
= vec_full_reg_size(s
);
747 /* Shift by element size is architecturally valid. For
748 arithmetic right-shift, it's the same as by one less.
749 Otherwise it is a zeroing operation. */
750 if (a
->imm
>= 8 << a
->esz
) {
752 a
->imm
= (8 << a
->esz
) - 1;
754 do_dupi_z(s
, a
->rd
, 0);
758 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
759 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
764 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
766 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
769 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
771 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
774 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
776 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
779 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
784 if (sve_access_check(s
)) {
785 unsigned vsz
= vec_full_reg_size(s
);
786 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
787 vec_full_reg_offset(s
, a
->rn
),
788 vec_full_reg_offset(s
, a
->rm
),
794 #define DO_ZZW(NAME, name) \
795 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
798 static gen_helper_gvec_3 * const fns[4] = { \
799 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
800 gen_helper_sve_##name##_zzw_s, NULL \
802 return do_zzw_ool(s, a, fns[a->esz]); \
812 *** SVE Integer Multiply-Add Group
815 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
816 gen_helper_gvec_5
*fn
)
818 if (sve_access_check(s
)) {
819 unsigned vsz
= vec_full_reg_size(s
);
820 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
821 vec_full_reg_offset(s
, a
->ra
),
822 vec_full_reg_offset(s
, a
->rn
),
823 vec_full_reg_offset(s
, a
->rm
),
824 pred_full_reg_offset(s
, a
->pg
),
830 #define DO_ZPZZZ(NAME, name) \
831 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
833 static gen_helper_gvec_5 * const fns[4] = { \
834 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
835 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
837 return do_zpzzz_ool(s, a, fns[a->esz]); \
846 *** SVE Index Generation Group
849 static void do_index(DisasContext
*s
, int esz
, int rd
,
850 TCGv_i64 start
, TCGv_i64 incr
)
852 unsigned vsz
= vec_full_reg_size(s
);
853 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
854 TCGv_ptr t_zd
= tcg_temp_new_ptr();
856 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
858 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
860 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
861 static index_fn
* const fns
[3] = {
862 gen_helper_sve_index_b
,
863 gen_helper_sve_index_h
,
864 gen_helper_sve_index_s
,
866 TCGv_i32 s32
= tcg_temp_new_i32();
867 TCGv_i32 i32
= tcg_temp_new_i32();
869 tcg_gen_extrl_i64_i32(s32
, start
);
870 tcg_gen_extrl_i64_i32(i32
, incr
);
871 fns
[esz
](t_zd
, s32
, i32
, desc
);
873 tcg_temp_free_i32(s32
);
874 tcg_temp_free_i32(i32
);
876 tcg_temp_free_ptr(t_zd
);
877 tcg_temp_free_i32(desc
);
880 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
, uint32_t insn
)
882 if (sve_access_check(s
)) {
883 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
884 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
885 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
886 tcg_temp_free_i64(start
);
887 tcg_temp_free_i64(incr
);
892 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
, uint32_t insn
)
894 if (sve_access_check(s
)) {
895 TCGv_i64 start
= tcg_const_i64(a
->imm
);
896 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
897 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
898 tcg_temp_free_i64(start
);
903 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
, uint32_t insn
)
905 if (sve_access_check(s
)) {
906 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
907 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
908 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
909 tcg_temp_free_i64(incr
);
914 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
, uint32_t insn
)
916 if (sve_access_check(s
)) {
917 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
918 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
919 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
925 *** SVE Stack Allocation Group
928 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
, uint32_t insn
)
930 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
931 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
932 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
936 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
, uint32_t insn
)
938 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
939 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
940 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
944 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
, uint32_t insn
)
946 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
947 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
952 *** SVE Compute Vector Address Group
955 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
957 if (sve_access_check(s
)) {
958 unsigned vsz
= vec_full_reg_size(s
);
959 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
960 vec_full_reg_offset(s
, a
->rn
),
961 vec_full_reg_offset(s
, a
->rm
),
962 vsz
, vsz
, a
->imm
, fn
);
967 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
969 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
972 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
974 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
977 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
979 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
982 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
984 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
988 *** SVE Integer Misc - Unpredicated Group
991 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
993 static gen_helper_gvec_2
* const fns
[4] = {
995 gen_helper_sve_fexpa_h
,
996 gen_helper_sve_fexpa_s
,
997 gen_helper_sve_fexpa_d
,
1002 if (sve_access_check(s
)) {
1003 unsigned vsz
= vec_full_reg_size(s
);
1004 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
1005 vec_full_reg_offset(s
, a
->rn
),
1006 vsz
, vsz
, 0, fns
[a
->esz
]);
1011 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
1013 static gen_helper_gvec_3
* const fns
[4] = {
1015 gen_helper_sve_ftssel_h
,
1016 gen_helper_sve_ftssel_s
,
1017 gen_helper_sve_ftssel_d
,
1022 if (sve_access_check(s
)) {
1023 unsigned vsz
= vec_full_reg_size(s
);
1024 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
1025 vec_full_reg_offset(s
, a
->rn
),
1026 vec_full_reg_offset(s
, a
->rm
),
1027 vsz
, vsz
, 0, fns
[a
->esz
]);
1033 *** SVE Predicate Logical Operations Group
1036 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1037 const GVecGen4
*gvec_op
)
1039 if (!sve_access_check(s
)) {
1043 unsigned psz
= pred_gvec_reg_size(s
);
1044 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1045 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1046 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1047 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1050 /* Do the operation and the flags generation in temps. */
1051 TCGv_i64 pd
= tcg_temp_new_i64();
1052 TCGv_i64 pn
= tcg_temp_new_i64();
1053 TCGv_i64 pm
= tcg_temp_new_i64();
1054 TCGv_i64 pg
= tcg_temp_new_i64();
1056 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1057 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1058 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1060 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1061 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1063 do_predtest1(pd
, pg
);
1065 tcg_temp_free_i64(pd
);
1066 tcg_temp_free_i64(pn
);
1067 tcg_temp_free_i64(pm
);
1068 tcg_temp_free_i64(pg
);
1070 /* The operation and flags generation is large. The computation
1071 * of the flags depends on the original contents of the guarding
1072 * predicate. If the destination overwrites the guarding predicate,
1073 * then the easiest way to get this right is to save a copy.
1076 if (a
->rd
== a
->pg
) {
1077 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1078 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1081 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1082 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1087 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1089 tcg_gen_and_i64(pd
, pn
, pm
);
1090 tcg_gen_and_i64(pd
, pd
, pg
);
1093 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1094 TCGv_vec pm
, TCGv_vec pg
)
1096 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1097 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1100 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1102 static const GVecGen4 op
= {
1103 .fni8
= gen_and_pg_i64
,
1104 .fniv
= gen_and_pg_vec
,
1105 .fno
= gen_helper_sve_and_pppp
,
1106 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1109 return do_pppp_flags(s
, a
, &op
);
1110 } else if (a
->rn
== a
->rm
) {
1111 if (a
->pg
== a
->rn
) {
1112 return do_mov_p(s
, a
->rd
, a
->rn
);
1114 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1116 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1117 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1119 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1123 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1125 tcg_gen_andc_i64(pd
, pn
, pm
);
1126 tcg_gen_and_i64(pd
, pd
, pg
);
1129 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1130 TCGv_vec pm
, TCGv_vec pg
)
1132 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1133 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1136 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1138 static const GVecGen4 op
= {
1139 .fni8
= gen_bic_pg_i64
,
1140 .fniv
= gen_bic_pg_vec
,
1141 .fno
= gen_helper_sve_bic_pppp
,
1142 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1145 return do_pppp_flags(s
, a
, &op
);
1146 } else if (a
->pg
== a
->rn
) {
1147 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1149 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1153 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1155 tcg_gen_xor_i64(pd
, pn
, pm
);
1156 tcg_gen_and_i64(pd
, pd
, pg
);
1159 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1160 TCGv_vec pm
, TCGv_vec pg
)
1162 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1163 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1166 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1168 static const GVecGen4 op
= {
1169 .fni8
= gen_eor_pg_i64
,
1170 .fniv
= gen_eor_pg_vec
,
1171 .fno
= gen_helper_sve_eor_pppp
,
1172 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1175 return do_pppp_flags(s
, a
, &op
);
1177 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1181 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1183 tcg_gen_and_i64(pn
, pn
, pg
);
1184 tcg_gen_andc_i64(pm
, pm
, pg
);
1185 tcg_gen_or_i64(pd
, pn
, pm
);
1188 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1189 TCGv_vec pm
, TCGv_vec pg
)
1191 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1192 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1193 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1196 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1198 static const GVecGen4 op
= {
1199 .fni8
= gen_sel_pg_i64
,
1200 .fniv
= gen_sel_pg_vec
,
1201 .fno
= gen_helper_sve_sel_pppp
,
1202 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1207 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1211 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1213 tcg_gen_or_i64(pd
, pn
, pm
);
1214 tcg_gen_and_i64(pd
, pd
, pg
);
1217 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1218 TCGv_vec pm
, TCGv_vec pg
)
1220 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1221 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1224 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1226 static const GVecGen4 op
= {
1227 .fni8
= gen_orr_pg_i64
,
1228 .fniv
= gen_orr_pg_vec
,
1229 .fno
= gen_helper_sve_orr_pppp
,
1230 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1233 return do_pppp_flags(s
, a
, &op
);
1234 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1235 return do_mov_p(s
, a
->rd
, a
->rn
);
1237 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1241 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1243 tcg_gen_orc_i64(pd
, pn
, pm
);
1244 tcg_gen_and_i64(pd
, pd
, pg
);
1247 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1248 TCGv_vec pm
, TCGv_vec pg
)
1250 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1251 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1254 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1256 static const GVecGen4 op
= {
1257 .fni8
= gen_orn_pg_i64
,
1258 .fniv
= gen_orn_pg_vec
,
1259 .fno
= gen_helper_sve_orn_pppp
,
1260 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1263 return do_pppp_flags(s
, a
, &op
);
1265 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1269 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1271 tcg_gen_or_i64(pd
, pn
, pm
);
1272 tcg_gen_andc_i64(pd
, pg
, pd
);
1275 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1276 TCGv_vec pm
, TCGv_vec pg
)
1278 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1279 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1282 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1284 static const GVecGen4 op
= {
1285 .fni8
= gen_nor_pg_i64
,
1286 .fniv
= gen_nor_pg_vec
,
1287 .fno
= gen_helper_sve_nor_pppp
,
1288 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1291 return do_pppp_flags(s
, a
, &op
);
1293 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1297 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1299 tcg_gen_and_i64(pd
, pn
, pm
);
1300 tcg_gen_andc_i64(pd
, pg
, pd
);
1303 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1304 TCGv_vec pm
, TCGv_vec pg
)
1306 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1307 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1310 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1312 static const GVecGen4 op
= {
1313 .fni8
= gen_nand_pg_i64
,
1314 .fniv
= gen_nand_pg_vec
,
1315 .fno
= gen_helper_sve_nand_pppp
,
1316 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1319 return do_pppp_flags(s
, a
, &op
);
1321 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1326 *** SVE Predicate Misc Group
1329 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1331 if (sve_access_check(s
)) {
1332 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1333 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1334 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1337 TCGv_i64 pn
= tcg_temp_new_i64();
1338 TCGv_i64 pg
= tcg_temp_new_i64();
1340 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1341 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1342 do_predtest1(pn
, pg
);
1344 tcg_temp_free_i64(pn
);
1345 tcg_temp_free_i64(pg
);
1347 do_predtest(s
, nofs
, gofs
, words
);
1353 /* See the ARM pseudocode DecodePredCount. */
1354 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1356 unsigned elements
= fullsz
>> esz
;
1360 case 0x0: /* POW2 */
1361 return pow2floor(elements
);
1372 case 0x9: /* VL16 */
1373 case 0xa: /* VL32 */
1374 case 0xb: /* VL64 */
1375 case 0xc: /* VL128 */
1376 case 0xd: /* VL256 */
1377 bound
= 16 << (pattern
- 9);
1379 case 0x1d: /* MUL4 */
1380 return elements
- elements
% 4;
1381 case 0x1e: /* MUL3 */
1382 return elements
- elements
% 3;
1383 case 0x1f: /* ALL */
1385 default: /* #uimm5 */
1388 return elements
>= bound
? bound
: 0;
1391 /* This handles all of the predicate initialization instructions,
1392 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1393 * so that decode_pred_count returns 0. For SETFFR, we will have
1394 * set RD == 16 == FFR.
1396 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1398 if (!sve_access_check(s
)) {
1402 unsigned fullsz
= vec_full_reg_size(s
);
1403 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1404 unsigned numelem
, setsz
, i
;
1405 uint64_t word
, lastword
;
1408 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1410 /* Determine what we must store into each bit, and how many. */
1412 lastword
= word
= 0;
1415 setsz
= numelem
<< esz
;
1416 lastword
= word
= pred_esz_masks
[esz
];
1418 lastword
&= ~(-1ull << (setsz
% 64));
1422 t
= tcg_temp_new_i64();
1424 tcg_gen_movi_i64(t
, lastword
);
1425 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1429 if (word
== lastword
) {
1430 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1431 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1433 if (oprsz
* 8 == setsz
) {
1434 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1437 if (oprsz
* 8 == setsz
+ 8) {
1438 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1439 tcg_gen_movi_i64(t
, 0);
1440 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1448 tcg_gen_movi_i64(t
, word
);
1449 for (i
= 0; i
< setsz
; i
+= 8) {
1450 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1452 if (lastword
!= word
) {
1453 tcg_gen_movi_i64(t
, lastword
);
1454 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1458 tcg_gen_movi_i64(t
, 0);
1459 for (; i
< fullsz
; i
+= 8) {
1460 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1465 tcg_temp_free_i64(t
);
1469 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1470 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1471 tcg_gen_movi_i32(cpu_VF
, 0);
1472 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1477 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1479 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1482 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1484 /* Note pat == 31 is #all, to set all elements. */
1485 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1488 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1490 /* Note pat == 32 is #unimp, to set no elements. */
1491 return do_predset(s
, 0, a
->rd
, 32, false);
1494 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1496 /* The path through do_pppp_flags is complicated enough to want to avoid
1497 * duplication. Frob the arguments into the form of a predicated AND.
1499 arg_rprr_s alt_a
= {
1500 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1501 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1503 return trans_AND_pppp(s
, &alt_a
, insn
);
1506 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1508 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1511 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1513 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1516 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1517 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1518 TCGv_ptr
, TCGv_i32
))
1520 if (!sve_access_check(s
)) {
1524 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1525 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1529 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1530 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1532 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1533 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1534 t
= tcg_const_i32(desc
);
1536 gen_fn(t
, t_pd
, t_pg
, t
);
1537 tcg_temp_free_ptr(t_pd
);
1538 tcg_temp_free_ptr(t_pg
);
1541 tcg_temp_free_i32(t
);
1545 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1547 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1550 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1552 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1556 *** SVE Element Count Group
1559 /* Perform an inline saturating addition of a 32-bit value within
1560 * a 64-bit register. The second operand is known to be positive,
1561 * which halves the comparisions we must perform to bound the result.
1563 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1569 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1571 tcg_gen_ext32u_i64(reg
, reg
);
1573 tcg_gen_ext32s_i64(reg
, reg
);
1576 tcg_gen_sub_i64(reg
, reg
, val
);
1577 ibound
= (u
? 0 : INT32_MIN
);
1580 tcg_gen_add_i64(reg
, reg
, val
);
1581 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1584 bound
= tcg_const_i64(ibound
);
1585 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1586 tcg_temp_free_i64(bound
);
1589 /* Similarly with 64-bit values. */
1590 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1592 TCGv_i64 t0
= tcg_temp_new_i64();
1593 TCGv_i64 t1
= tcg_temp_new_i64();
1598 tcg_gen_sub_i64(t0
, reg
, val
);
1599 tcg_gen_movi_i64(t1
, 0);
1600 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1602 tcg_gen_add_i64(t0
, reg
, val
);
1603 tcg_gen_movi_i64(t1
, -1);
1604 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1608 /* Detect signed overflow for subtraction. */
1609 tcg_gen_xor_i64(t0
, reg
, val
);
1610 tcg_gen_sub_i64(t1
, reg
, val
);
1611 tcg_gen_xor_i64(reg
, reg
, t0
);
1612 tcg_gen_and_i64(t0
, t0
, reg
);
1614 /* Bound the result. */
1615 tcg_gen_movi_i64(reg
, INT64_MIN
);
1616 t2
= tcg_const_i64(0);
1617 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1619 /* Detect signed overflow for addition. */
1620 tcg_gen_xor_i64(t0
, reg
, val
);
1621 tcg_gen_add_i64(reg
, reg
, val
);
1622 tcg_gen_xor_i64(t1
, reg
, val
);
1623 tcg_gen_andc_i64(t0
, t1
, t0
);
1625 /* Bound the result. */
1626 tcg_gen_movi_i64(t1
, INT64_MAX
);
1627 t2
= tcg_const_i64(0);
1628 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1630 tcg_temp_free_i64(t2
);
1632 tcg_temp_free_i64(t0
);
1633 tcg_temp_free_i64(t1
);
1636 /* Similarly with a vector and a scalar operand. */
1637 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1638 TCGv_i64 val
, bool u
, bool d
)
1640 unsigned vsz
= vec_full_reg_size(s
);
1641 TCGv_ptr dptr
, nptr
;
1645 dptr
= tcg_temp_new_ptr();
1646 nptr
= tcg_temp_new_ptr();
1647 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1648 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1649 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1653 t32
= tcg_temp_new_i32();
1654 tcg_gen_extrl_i64_i32(t32
, val
);
1656 tcg_gen_neg_i32(t32
, t32
);
1659 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1661 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1663 tcg_temp_free_i32(t32
);
1667 t32
= tcg_temp_new_i32();
1668 tcg_gen_extrl_i64_i32(t32
, val
);
1670 tcg_gen_neg_i32(t32
, t32
);
1673 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1675 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1677 tcg_temp_free_i32(t32
);
1681 t64
= tcg_temp_new_i64();
1683 tcg_gen_neg_i64(t64
, val
);
1685 tcg_gen_mov_i64(t64
, val
);
1688 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1690 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1692 tcg_temp_free_i64(t64
);
1698 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1700 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1703 t64
= tcg_temp_new_i64();
1704 tcg_gen_neg_i64(t64
, val
);
1705 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1706 tcg_temp_free_i64(t64
);
1708 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1713 g_assert_not_reached();
1716 tcg_temp_free_ptr(dptr
);
1717 tcg_temp_free_ptr(nptr
);
1718 tcg_temp_free_i32(desc
);
1721 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
, uint32_t insn
)
1723 if (sve_access_check(s
)) {
1724 unsigned fullsz
= vec_full_reg_size(s
);
1725 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1726 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1731 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
, uint32_t insn
)
1733 if (sve_access_check(s
)) {
1734 unsigned fullsz
= vec_full_reg_size(s
);
1735 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1736 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
1737 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1739 tcg_gen_addi_i64(reg
, reg
, inc
);
1744 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
,
1747 if (!sve_access_check(s
)) {
1751 unsigned fullsz
= vec_full_reg_size(s
);
1752 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1753 int inc
= numelem
* a
->imm
;
1754 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1756 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1759 tcg_gen_ext32u_i64(reg
, reg
);
1761 tcg_gen_ext32s_i64(reg
, reg
);
1764 TCGv_i64 t
= tcg_const_i64(inc
);
1765 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1766 tcg_temp_free_i64(t
);
1771 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
,
1774 if (!sve_access_check(s
)) {
1778 unsigned fullsz
= vec_full_reg_size(s
);
1779 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1780 int inc
= numelem
* a
->imm
;
1781 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1784 TCGv_i64 t
= tcg_const_i64(inc
);
1785 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1786 tcg_temp_free_i64(t
);
1791 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
, uint32_t insn
)
1797 unsigned fullsz
= vec_full_reg_size(s
);
1798 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1799 int inc
= numelem
* a
->imm
;
1802 if (sve_access_check(s
)) {
1803 TCGv_i64 t
= tcg_const_i64(a
->d
? -inc
: inc
);
1804 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1805 vec_full_reg_offset(s
, a
->rn
),
1807 tcg_temp_free_i64(t
);
1810 do_mov_z(s
, a
->rd
, a
->rn
);
1815 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
,
1822 unsigned fullsz
= vec_full_reg_size(s
);
1823 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1824 int inc
= numelem
* a
->imm
;
1827 if (sve_access_check(s
)) {
1828 TCGv_i64 t
= tcg_const_i64(inc
);
1829 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1830 tcg_temp_free_i64(t
);
1833 do_mov_z(s
, a
->rd
, a
->rn
);
1839 *** SVE Bitwise Immediate Group
1842 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1845 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1846 extract32(a
->dbm
, 0, 6),
1847 extract32(a
->dbm
, 6, 6))) {
1850 if (sve_access_check(s
)) {
1851 unsigned vsz
= vec_full_reg_size(s
);
1852 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1853 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1858 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1860 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1863 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1865 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1868 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1870 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1873 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
, uint32_t insn
)
1876 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1877 extract32(a
->dbm
, 0, 6),
1878 extract32(a
->dbm
, 6, 6))) {
1881 if (sve_access_check(s
)) {
1882 do_dupi_z(s
, a
->rd
, imm
);
1888 *** SVE Integer Wide Immediate - Predicated Group
1891 /* Implement all merging copies. This is used for CPY (immediate),
1892 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1894 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1897 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1898 static gen_cpy
* const fns
[4] = {
1899 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1900 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1902 unsigned vsz
= vec_full_reg_size(s
);
1903 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1904 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1905 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1906 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1908 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1909 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1910 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1912 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1914 tcg_temp_free_ptr(t_zd
);
1915 tcg_temp_free_ptr(t_zn
);
1916 tcg_temp_free_ptr(t_pg
);
1917 tcg_temp_free_i32(desc
);
1920 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
, uint32_t insn
)
1925 if (sve_access_check(s
)) {
1926 /* Decode the VFP immediate. */
1927 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1928 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1929 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1930 tcg_temp_free_i64(t_imm
);
1935 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
1937 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1940 if (sve_access_check(s
)) {
1941 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1942 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1943 tcg_temp_free_i64(t_imm
);
1948 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
, uint32_t insn
)
1950 static gen_helper_gvec_2i
* const fns
[4] = {
1951 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1952 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1955 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1958 if (sve_access_check(s
)) {
1959 unsigned vsz
= vec_full_reg_size(s
);
1960 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1961 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1962 pred_full_reg_offset(s
, a
->pg
),
1963 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1964 tcg_temp_free_i64(t_imm
);
1970 *** SVE Permute Extract Group
1973 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
, uint32_t insn
)
1975 if (!sve_access_check(s
)) {
1979 unsigned vsz
= vec_full_reg_size(s
);
1980 unsigned n_ofs
= a
->imm
>= vsz
? 0 : a
->imm
;
1981 unsigned n_siz
= vsz
- n_ofs
;
1982 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1983 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1984 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1986 /* Use host vector move insns if we have appropriate sizes
1987 * and no unfortunate overlap.
1990 && n_ofs
== size_for_gvec(n_ofs
)
1991 && n_siz
== size_for_gvec(n_siz
)
1992 && (d
!= n
|| n_siz
<= n_ofs
)) {
1993 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
1995 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
1998 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2004 *** SVE Permute - Unpredicated Group
2007 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
, uint32_t insn
)
2009 if (sve_access_check(s
)) {
2010 unsigned vsz
= vec_full_reg_size(s
);
2011 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2012 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2017 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
, uint32_t insn
)
2019 if ((a
->imm
& 0x1f) == 0) {
2022 if (sve_access_check(s
)) {
2023 unsigned vsz
= vec_full_reg_size(s
);
2024 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2025 unsigned esz
, index
;
2027 esz
= ctz32(a
->imm
);
2028 index
= a
->imm
>> (esz
+ 1);
2030 if ((index
<< esz
) < vsz
) {
2031 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2032 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2034 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
2040 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2042 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2043 static gen_insr
* const fns
[4] = {
2044 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2045 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2047 unsigned vsz
= vec_full_reg_size(s
);
2048 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2049 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2050 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2052 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2053 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2055 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2057 tcg_temp_free_ptr(t_zd
);
2058 tcg_temp_free_ptr(t_zn
);
2059 tcg_temp_free_i32(desc
);
2062 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2064 if (sve_access_check(s
)) {
2065 TCGv_i64 t
= tcg_temp_new_i64();
2066 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2067 do_insr_i64(s
, a
, t
);
2068 tcg_temp_free_i64(t
);
2073 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2075 if (sve_access_check(s
)) {
2076 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2081 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2083 static gen_helper_gvec_2
* const fns
[4] = {
2084 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2085 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2088 if (sve_access_check(s
)) {
2089 unsigned vsz
= vec_full_reg_size(s
);
2090 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2091 vec_full_reg_offset(s
, a
->rn
),
2092 vsz
, vsz
, 0, fns
[a
->esz
]);
2097 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2099 static gen_helper_gvec_3
* const fns
[4] = {
2100 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2101 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2104 if (sve_access_check(s
)) {
2105 unsigned vsz
= vec_full_reg_size(s
);
2106 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2107 vec_full_reg_offset(s
, a
->rn
),
2108 vec_full_reg_offset(s
, a
->rm
),
2109 vsz
, vsz
, 0, fns
[a
->esz
]);
2114 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
, uint32_t insn
)
2116 static gen_helper_gvec_2
* const fns
[4][2] = {
2118 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2119 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2120 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2126 if (sve_access_check(s
)) {
2127 unsigned vsz
= vec_full_reg_size(s
);
2128 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2129 vec_full_reg_offset(s
, a
->rn
)
2130 + (a
->h
? vsz
/ 2 : 0),
2131 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2137 *** SVE Permute - Predicates Group
2140 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2141 gen_helper_gvec_3
*fn
)
2143 if (!sve_access_check(s
)) {
2147 unsigned vsz
= pred_full_reg_size(s
);
2149 /* Predicate sizes may be smaller and cannot use simd_desc.
2150 We cannot round up, as we do elsewhere, because we need
2151 the exact size for ZIP2 and REV. We retain the style for
2152 the other helpers for consistency. */
2153 TCGv_ptr t_d
= tcg_temp_new_ptr();
2154 TCGv_ptr t_n
= tcg_temp_new_ptr();
2155 TCGv_ptr t_m
= tcg_temp_new_ptr();
2160 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2161 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2163 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2164 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2165 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2166 t_desc
= tcg_const_i32(desc
);
2168 fn(t_d
, t_n
, t_m
, t_desc
);
2170 tcg_temp_free_ptr(t_d
);
2171 tcg_temp_free_ptr(t_n
);
2172 tcg_temp_free_ptr(t_m
);
2173 tcg_temp_free_i32(t_desc
);
2177 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2178 gen_helper_gvec_2
*fn
)
2180 if (!sve_access_check(s
)) {
2184 unsigned vsz
= pred_full_reg_size(s
);
2185 TCGv_ptr t_d
= tcg_temp_new_ptr();
2186 TCGv_ptr t_n
= tcg_temp_new_ptr();
2190 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2191 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2193 /* Predicate sizes may be smaller and cannot use simd_desc.
2194 We cannot round up, as we do elsewhere, because we need
2195 the exact size for ZIP2 and REV. We retain the style for
2196 the other helpers for consistency. */
2199 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2200 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2201 t_desc
= tcg_const_i32(desc
);
2203 fn(t_d
, t_n
, t_desc
);
2205 tcg_temp_free_i32(t_desc
);
2206 tcg_temp_free_ptr(t_d
);
2207 tcg_temp_free_ptr(t_n
);
2211 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2213 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2216 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2218 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2221 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2223 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2226 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2228 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2231 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2233 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2236 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2238 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2241 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2243 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2246 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
, uint32_t insn
)
2248 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2251 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
, uint32_t insn
)
2253 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2257 *** SVE Permute - Interleaving Group
2260 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2262 static gen_helper_gvec_3
* const fns
[4] = {
2263 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2264 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2267 if (sve_access_check(s
)) {
2268 unsigned vsz
= vec_full_reg_size(s
);
2269 unsigned high_ofs
= high
? vsz
/ 2 : 0;
2270 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2271 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2272 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2273 vsz
, vsz
, 0, fns
[a
->esz
]);
2278 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2279 gen_helper_gvec_3
*fn
)
2281 if (sve_access_check(s
)) {
2282 unsigned vsz
= vec_full_reg_size(s
);
2283 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2284 vec_full_reg_offset(s
, a
->rn
),
2285 vec_full_reg_offset(s
, a
->rm
),
2286 vsz
, vsz
, data
, fn
);
2291 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2293 return do_zip(s
, a
, false);
2296 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2298 return do_zip(s
, a
, true);
2301 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2302 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2303 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2306 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2308 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2311 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2313 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2316 static gen_helper_gvec_3
* const trn_fns
[4] = {
2317 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2318 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2321 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2323 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2326 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2328 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2332 *** SVE Permute Vector - Predicated Group
2335 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2337 static gen_helper_gvec_3
* const fns
[4] = {
2338 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2340 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2343 /* Call the helper that computes the ARM LastActiveElement pseudocode
2344 * function, scaled by the element size. This includes the not found
2345 * indication; e.g. not found for esz=3 is -8.
2347 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2349 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2350 * round up, as we do elsewhere, because we need the exact size.
2352 TCGv_ptr t_p
= tcg_temp_new_ptr();
2354 unsigned vsz
= pred_full_reg_size(s
);
2358 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2360 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2361 t_desc
= tcg_const_i32(desc
);
2363 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2365 tcg_temp_free_i32(t_desc
);
2366 tcg_temp_free_ptr(t_p
);
2369 /* Increment LAST to the offset of the next element in the vector,
2370 * wrapping around to 0.
2372 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2374 unsigned vsz
= vec_full_reg_size(s
);
2376 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2377 if (is_power_of_2(vsz
)) {
2378 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2380 TCGv_i32 max
= tcg_const_i32(vsz
);
2381 TCGv_i32 zero
= tcg_const_i32(0);
2382 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2383 tcg_temp_free_i32(max
);
2384 tcg_temp_free_i32(zero
);
2388 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2389 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2391 unsigned vsz
= vec_full_reg_size(s
);
2393 if (is_power_of_2(vsz
)) {
2394 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2396 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2397 TCGv_i32 zero
= tcg_const_i32(0);
2398 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2399 tcg_temp_free_i32(max
);
2400 tcg_temp_free_i32(zero
);
2404 /* Load an unsigned element of ESZ from BASE+OFS. */
2405 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2407 TCGv_i64 r
= tcg_temp_new_i64();
2411 tcg_gen_ld8u_i64(r
, base
, ofs
);
2414 tcg_gen_ld16u_i64(r
, base
, ofs
);
2417 tcg_gen_ld32u_i64(r
, base
, ofs
);
2420 tcg_gen_ld_i64(r
, base
, ofs
);
2423 g_assert_not_reached();
2428 /* Load an unsigned element of ESZ from RM[LAST]. */
2429 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2432 TCGv_ptr p
= tcg_temp_new_ptr();
2435 /* Convert offset into vector into offset into ENV.
2436 * The final adjustment for the vector register base
2437 * is added via constant offset to the load.
2439 #ifdef HOST_WORDS_BIGENDIAN
2440 /* Adjust for element ordering. See vec_reg_offset. */
2442 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2445 tcg_gen_ext_i32_ptr(p
, last
);
2446 tcg_gen_add_ptr(p
, p
, cpu_env
);
2448 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2449 tcg_temp_free_ptr(p
);
2454 /* Compute CLAST for a Zreg. */
2455 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2460 unsigned vsz
, esz
= a
->esz
;
2462 if (!sve_access_check(s
)) {
2466 last
= tcg_temp_local_new_i32();
2467 over
= gen_new_label();
2469 find_last_active(s
, last
, esz
, a
->pg
);
2471 /* There is of course no movcond for a 2048-bit vector,
2472 * so we must branch over the actual store.
2474 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2477 incr_last_active(s
, last
, esz
);
2480 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2481 tcg_temp_free_i32(last
);
2483 vsz
= vec_full_reg_size(s
);
2484 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2485 tcg_temp_free_i64(ele
);
2487 /* If this insn used MOVPRFX, we may need a second move. */
2488 if (a
->rd
!= a
->rn
) {
2489 TCGLabel
*done
= gen_new_label();
2492 gen_set_label(over
);
2493 do_mov_z(s
, a
->rd
, a
->rn
);
2495 gen_set_label(done
);
2497 gen_set_label(over
);
2502 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2504 return do_clast_vector(s
, a
, false);
2507 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2509 return do_clast_vector(s
, a
, true);
2512 /* Compute CLAST for a scalar. */
2513 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2514 bool before
, TCGv_i64 reg_val
)
2516 TCGv_i32 last
= tcg_temp_new_i32();
2517 TCGv_i64 ele
, cmp
, zero
;
2519 find_last_active(s
, last
, esz
, pg
);
2521 /* Extend the original value of last prior to incrementing. */
2522 cmp
= tcg_temp_new_i64();
2523 tcg_gen_ext_i32_i64(cmp
, last
);
2526 incr_last_active(s
, last
, esz
);
2529 /* The conceit here is that while last < 0 indicates not found, after
2530 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2531 * from which we can load garbage. We then discard the garbage with
2532 * a conditional move.
2534 ele
= load_last_active(s
, last
, rm
, esz
);
2535 tcg_temp_free_i32(last
);
2537 zero
= tcg_const_i64(0);
2538 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2540 tcg_temp_free_i64(zero
);
2541 tcg_temp_free_i64(cmp
);
2542 tcg_temp_free_i64(ele
);
2545 /* Compute CLAST for a Vreg. */
2546 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2548 if (sve_access_check(s
)) {
2550 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2551 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2553 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2554 write_fp_dreg(s
, a
->rd
, reg
);
2555 tcg_temp_free_i64(reg
);
2560 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2562 return do_clast_fp(s
, a
, false);
2565 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2567 return do_clast_fp(s
, a
, true);
2570 /* Compute CLAST for a Xreg. */
2571 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2575 if (!sve_access_check(s
)) {
2579 reg
= cpu_reg(s
, a
->rd
);
2582 tcg_gen_ext8u_i64(reg
, reg
);
2585 tcg_gen_ext16u_i64(reg
, reg
);
2588 tcg_gen_ext32u_i64(reg
, reg
);
2593 g_assert_not_reached();
2596 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2600 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2602 return do_clast_general(s
, a
, false);
2605 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2607 return do_clast_general(s
, a
, true);
2610 /* Compute LAST for a scalar. */
2611 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2612 int pg
, int rm
, bool before
)
2614 TCGv_i32 last
= tcg_temp_new_i32();
2617 find_last_active(s
, last
, esz
, pg
);
2619 wrap_last_active(s
, last
, esz
);
2621 incr_last_active(s
, last
, esz
);
2624 ret
= load_last_active(s
, last
, rm
, esz
);
2625 tcg_temp_free_i32(last
);
2629 /* Compute LAST for a Vreg. */
2630 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2632 if (sve_access_check(s
)) {
2633 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2634 write_fp_dreg(s
, a
->rd
, val
);
2635 tcg_temp_free_i64(val
);
2640 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2642 return do_last_fp(s
, a
, false);
2645 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2647 return do_last_fp(s
, a
, true);
2650 /* Compute LAST for a Xreg. */
2651 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2653 if (sve_access_check(s
)) {
2654 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2655 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2656 tcg_temp_free_i64(val
);
2661 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2663 return do_last_general(s
, a
, false);
2666 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2668 return do_last_general(s
, a
, true);
2671 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2673 if (sve_access_check(s
)) {
2674 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2679 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2681 if (sve_access_check(s
)) {
2682 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2683 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2684 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2685 tcg_temp_free_i64(t
);
2690 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2692 static gen_helper_gvec_3
* const fns
[4] = {
2694 gen_helper_sve_revb_h
,
2695 gen_helper_sve_revb_s
,
2696 gen_helper_sve_revb_d
,
2698 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2701 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2703 static gen_helper_gvec_3
* const fns
[4] = {
2706 gen_helper_sve_revh_s
,
2707 gen_helper_sve_revh_d
,
2709 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2712 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2714 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2717 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2719 static gen_helper_gvec_3
* const fns
[4] = {
2720 gen_helper_sve_rbit_b
,
2721 gen_helper_sve_rbit_h
,
2722 gen_helper_sve_rbit_s
,
2723 gen_helper_sve_rbit_d
,
2725 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2728 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2730 if (sve_access_check(s
)) {
2731 unsigned vsz
= vec_full_reg_size(s
);
2732 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2733 vec_full_reg_offset(s
, a
->rn
),
2734 vec_full_reg_offset(s
, a
->rm
),
2735 pred_full_reg_offset(s
, a
->pg
),
2736 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2742 *** SVE Integer Compare - Vectors Group
2745 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2746 gen_helper_gvec_flags_4
*gen_fn
)
2748 TCGv_ptr pd
, zn
, zm
, pg
;
2752 if (gen_fn
== NULL
) {
2755 if (!sve_access_check(s
)) {
2759 vsz
= vec_full_reg_size(s
);
2760 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2761 pd
= tcg_temp_new_ptr();
2762 zn
= tcg_temp_new_ptr();
2763 zm
= tcg_temp_new_ptr();
2764 pg
= tcg_temp_new_ptr();
2766 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2767 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2768 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2769 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2771 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2773 tcg_temp_free_ptr(pd
);
2774 tcg_temp_free_ptr(zn
);
2775 tcg_temp_free_ptr(zm
);
2776 tcg_temp_free_ptr(pg
);
2780 tcg_temp_free_i32(t
);
2784 #define DO_PPZZ(NAME, name) \
2785 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2788 static gen_helper_gvec_flags_4 * const fns[4] = { \
2789 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2790 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2792 return do_ppzz_flags(s, a, fns[a->esz]); \
2795 DO_PPZZ(CMPEQ
, cmpeq
)
2796 DO_PPZZ(CMPNE
, cmpne
)
2797 DO_PPZZ(CMPGT
, cmpgt
)
2798 DO_PPZZ(CMPGE
, cmpge
)
2799 DO_PPZZ(CMPHI
, cmphi
)
2800 DO_PPZZ(CMPHS
, cmphs
)
2804 #define DO_PPZW(NAME, name) \
2805 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2808 static gen_helper_gvec_flags_4 * const fns[4] = { \
2809 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2810 gen_helper_sve_##name##_ppzw_s, NULL \
2812 return do_ppzz_flags(s, a, fns[a->esz]); \
2815 DO_PPZW(CMPEQ
, cmpeq
)
2816 DO_PPZW(CMPNE
, cmpne
)
2817 DO_PPZW(CMPGT
, cmpgt
)
2818 DO_PPZW(CMPGE
, cmpge
)
2819 DO_PPZW(CMPHI
, cmphi
)
2820 DO_PPZW(CMPHS
, cmphs
)
2821 DO_PPZW(CMPLT
, cmplt
)
2822 DO_PPZW(CMPLE
, cmple
)
2823 DO_PPZW(CMPLO
, cmplo
)
2824 DO_PPZW(CMPLS
, cmpls
)
2829 *** SVE Integer Compare - Immediate Groups
2832 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2833 gen_helper_gvec_flags_3
*gen_fn
)
2835 TCGv_ptr pd
, zn
, pg
;
2839 if (gen_fn
== NULL
) {
2842 if (!sve_access_check(s
)) {
2846 vsz
= vec_full_reg_size(s
);
2847 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2848 pd
= tcg_temp_new_ptr();
2849 zn
= tcg_temp_new_ptr();
2850 pg
= tcg_temp_new_ptr();
2852 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2853 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2854 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2856 gen_fn(t
, pd
, zn
, pg
, t
);
2858 tcg_temp_free_ptr(pd
);
2859 tcg_temp_free_ptr(zn
);
2860 tcg_temp_free_ptr(pg
);
2864 tcg_temp_free_i32(t
);
2868 #define DO_PPZI(NAME, name) \
2869 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2872 static gen_helper_gvec_flags_3 * const fns[4] = { \
2873 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2874 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2876 return do_ppzi_flags(s, a, fns[a->esz]); \
2879 DO_PPZI(CMPEQ
, cmpeq
)
2880 DO_PPZI(CMPNE
, cmpne
)
2881 DO_PPZI(CMPGT
, cmpgt
)
2882 DO_PPZI(CMPGE
, cmpge
)
2883 DO_PPZI(CMPHI
, cmphi
)
2884 DO_PPZI(CMPHS
, cmphs
)
2885 DO_PPZI(CMPLT
, cmplt
)
2886 DO_PPZI(CMPLE
, cmple
)
2887 DO_PPZI(CMPLO
, cmplo
)
2888 DO_PPZI(CMPLS
, cmpls
)
2893 *** SVE Partition Break Group
2896 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2897 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2899 if (!sve_access_check(s
)) {
2903 unsigned vsz
= pred_full_reg_size(s
);
2905 /* Predicate sizes may be smaller and cannot use simd_desc. */
2906 TCGv_ptr d
= tcg_temp_new_ptr();
2907 TCGv_ptr n
= tcg_temp_new_ptr();
2908 TCGv_ptr m
= tcg_temp_new_ptr();
2909 TCGv_ptr g
= tcg_temp_new_ptr();
2910 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2912 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2913 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2914 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2915 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2918 fn_s(t
, d
, n
, m
, g
, t
);
2923 tcg_temp_free_ptr(d
);
2924 tcg_temp_free_ptr(n
);
2925 tcg_temp_free_ptr(m
);
2926 tcg_temp_free_ptr(g
);
2927 tcg_temp_free_i32(t
);
2931 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2932 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2934 if (!sve_access_check(s
)) {
2938 unsigned vsz
= pred_full_reg_size(s
);
2940 /* Predicate sizes may be smaller and cannot use simd_desc. */
2941 TCGv_ptr d
= tcg_temp_new_ptr();
2942 TCGv_ptr n
= tcg_temp_new_ptr();
2943 TCGv_ptr g
= tcg_temp_new_ptr();
2944 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2946 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2947 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2948 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2951 fn_s(t
, d
, n
, g
, t
);
2956 tcg_temp_free_ptr(d
);
2957 tcg_temp_free_ptr(n
);
2958 tcg_temp_free_ptr(g
);
2959 tcg_temp_free_i32(t
);
2963 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
2965 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2968 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
2970 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2973 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2975 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
2978 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2980 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
2983 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2985 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
2988 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2990 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
2993 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2995 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
2999 *** SVE Predicate Count Group
3002 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3004 unsigned psz
= pred_full_reg_size(s
);
3009 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3011 TCGv_i64 g
= tcg_temp_new_i64();
3012 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3013 tcg_gen_and_i64(val
, val
, g
);
3014 tcg_temp_free_i64(g
);
3017 /* Reduce the pred_esz_masks value simply to reduce the
3018 * size of the code generated here.
3020 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3021 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3023 tcg_gen_ctpop_i64(val
, val
);
3025 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3026 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3031 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
3033 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3034 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3035 t_desc
= tcg_const_i32(desc
);
3037 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
3038 tcg_temp_free_ptr(t_pn
);
3039 tcg_temp_free_ptr(t_pg
);
3040 tcg_temp_free_i32(t_desc
);
3044 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
, uint32_t insn
)
3046 if (sve_access_check(s
)) {
3047 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3052 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
,
3055 if (sve_access_check(s
)) {
3056 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3057 TCGv_i64 val
= tcg_temp_new_i64();
3059 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3061 tcg_gen_sub_i64(reg
, reg
, val
);
3063 tcg_gen_add_i64(reg
, reg
, val
);
3065 tcg_temp_free_i64(val
);
3070 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
,
3076 if (sve_access_check(s
)) {
3077 unsigned vsz
= vec_full_reg_size(s
);
3078 TCGv_i64 val
= tcg_temp_new_i64();
3079 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3081 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3082 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3083 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3088 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
,
3091 if (sve_access_check(s
)) {
3092 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3093 TCGv_i64 val
= tcg_temp_new_i64();
3095 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3096 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3101 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
,
3104 if (sve_access_check(s
)) {
3105 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3106 TCGv_i64 val
= tcg_temp_new_i64();
3108 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3109 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3114 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
,
3120 if (sve_access_check(s
)) {
3121 TCGv_i64 val
= tcg_temp_new_i64();
3122 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3123 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3129 *** SVE Integer Compare Scalars Group
3132 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
, uint32_t insn
)
3134 if (!sve_access_check(s
)) {
3138 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3139 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3140 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3141 TCGv_i64 cmp
= tcg_temp_new_i64();
3143 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3144 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3145 tcg_temp_free_i64(cmp
);
3147 /* VF = !NF & !CF. */
3148 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3149 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3151 /* Both NF and VF actually look at bit 31. */
3152 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3153 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3157 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
, uint32_t insn
)
3159 if (!sve_access_check(s
)) {
3163 TCGv_i64 op0
= read_cpu_reg(s
, a
->rn
, 1);
3164 TCGv_i64 op1
= read_cpu_reg(s
, a
->rm
, 1);
3165 TCGv_i64 t0
= tcg_temp_new_i64();
3166 TCGv_i64 t1
= tcg_temp_new_i64();
3169 unsigned desc
, vsz
= vec_full_reg_size(s
);
3174 tcg_gen_ext32u_i64(op0
, op0
);
3175 tcg_gen_ext32u_i64(op1
, op1
);
3177 tcg_gen_ext32s_i64(op0
, op0
);
3178 tcg_gen_ext32s_i64(op1
, op1
);
3182 /* For the helper, compress the different conditions into a computation
3183 * of how many iterations for which the condition is true.
3185 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3186 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3187 * aren't that large, so any value >= predicate size is sufficient.
3189 tcg_gen_sub_i64(t0
, op1
, op0
);
3191 /* t0 = MIN(op1 - op0, vsz). */
3192 tcg_gen_movi_i64(t1
, vsz
);
3193 tcg_gen_umin_i64(t0
, t0
, t1
);
3195 /* Equality means one more iteration. */
3196 tcg_gen_addi_i64(t0
, t0
, 1);
3199 /* t0 = (condition true ? t0 : 0). */
3201 ? (a
->eq
? TCG_COND_LEU
: TCG_COND_LTU
)
3202 : (a
->eq
? TCG_COND_LE
: TCG_COND_LT
));
3203 tcg_gen_movi_i64(t1
, 0);
3204 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3206 t2
= tcg_temp_new_i32();
3207 tcg_gen_extrl_i64_i32(t2
, t0
);
3208 tcg_temp_free_i64(t0
);
3209 tcg_temp_free_i64(t1
);
3211 desc
= (vsz
/ 8) - 2;
3212 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
3213 t3
= tcg_const_i32(desc
);
3215 ptr
= tcg_temp_new_ptr();
3216 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3218 gen_helper_sve_while(t2
, ptr
, t2
, t3
);
3221 tcg_temp_free_ptr(ptr
);
3222 tcg_temp_free_i32(t2
);
3223 tcg_temp_free_i32(t3
);
3228 *** SVE Integer Wide Immediate - Unpredicated Group
3231 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
, uint32_t insn
)
3236 if (sve_access_check(s
)) {
3237 unsigned vsz
= vec_full_reg_size(s
);
3238 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3241 /* Decode the VFP immediate. */
3242 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3243 imm
= dup_const(a
->esz
, imm
);
3245 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, imm
);
3250 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
, uint32_t insn
)
3252 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3255 if (sve_access_check(s
)) {
3256 unsigned vsz
= vec_full_reg_size(s
);
3257 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3259 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, dup_const(a
->esz
, a
->imm
));
3264 static bool trans_ADD_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3266 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3269 if (sve_access_check(s
)) {
3270 unsigned vsz
= vec_full_reg_size(s
);
3271 tcg_gen_gvec_addi(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3272 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3277 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3280 return trans_ADD_zzi(s
, a
, insn
);
3283 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3285 static const GVecGen2s op
[4] = {
3286 { .fni8
= tcg_gen_vec_sub8_i64
,
3287 .fniv
= tcg_gen_sub_vec
,
3288 .fno
= gen_helper_sve_subri_b
,
3289 .opc
= INDEX_op_sub_vec
,
3291 .scalar_first
= true },
3292 { .fni8
= tcg_gen_vec_sub16_i64
,
3293 .fniv
= tcg_gen_sub_vec
,
3294 .fno
= gen_helper_sve_subri_h
,
3295 .opc
= INDEX_op_sub_vec
,
3297 .scalar_first
= true },
3298 { .fni4
= tcg_gen_sub_i32
,
3299 .fniv
= tcg_gen_sub_vec
,
3300 .fno
= gen_helper_sve_subri_s
,
3301 .opc
= INDEX_op_sub_vec
,
3303 .scalar_first
= true },
3304 { .fni8
= tcg_gen_sub_i64
,
3305 .fniv
= tcg_gen_sub_vec
,
3306 .fno
= gen_helper_sve_subri_d
,
3307 .opc
= INDEX_op_sub_vec
,
3308 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3310 .scalar_first
= true }
3313 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3316 if (sve_access_check(s
)) {
3317 unsigned vsz
= vec_full_reg_size(s
);
3318 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3319 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3320 vec_full_reg_offset(s
, a
->rn
),
3321 vsz
, vsz
, c
, &op
[a
->esz
]);
3322 tcg_temp_free_i64(c
);
3327 static bool trans_MUL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3329 if (sve_access_check(s
)) {
3330 unsigned vsz
= vec_full_reg_size(s
);
3331 tcg_gen_gvec_muli(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3332 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
3337 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
,
3340 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
3343 if (sve_access_check(s
)) {
3344 TCGv_i64 val
= tcg_const_i64(a
->imm
);
3345 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, u
, d
);
3346 tcg_temp_free_i64(val
);
3351 static bool trans_SQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3353 return do_zzi_sat(s
, a
, insn
, false, false);
3356 static bool trans_UQADD_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3358 return do_zzi_sat(s
, a
, insn
, true, false);
3361 static bool trans_SQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3363 return do_zzi_sat(s
, a
, insn
, false, true);
3366 static bool trans_UQSUB_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
3368 return do_zzi_sat(s
, a
, insn
, true, true);
3371 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3373 if (sve_access_check(s
)) {
3374 unsigned vsz
= vec_full_reg_size(s
);
3375 TCGv_i64 c
= tcg_const_i64(a
->imm
);
3377 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3378 vec_full_reg_offset(s
, a
->rn
),
3379 c
, vsz
, vsz
, 0, fn
);
3380 tcg_temp_free_i64(c
);
3385 #define DO_ZZI(NAME, name) \
3386 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3389 static gen_helper_gvec_2i * const fns[4] = { \
3390 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3391 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3393 return do_zzi_ool(s, a, fns[a->esz]); \
3404 *** SVE Floating Point Multiply-Add Indexed Group
3407 static bool trans_FMLA_zzxz(DisasContext
*s
, arg_FMLA_zzxz
*a
, uint32_t insn
)
3409 static gen_helper_gvec_4_ptr
* const fns
[3] = {
3410 gen_helper_gvec_fmla_idx_h
,
3411 gen_helper_gvec_fmla_idx_s
,
3412 gen_helper_gvec_fmla_idx_d
,
3415 if (sve_access_check(s
)) {
3416 unsigned vsz
= vec_full_reg_size(s
);
3417 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3418 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3419 vec_full_reg_offset(s
, a
->rn
),
3420 vec_full_reg_offset(s
, a
->rm
),
3421 vec_full_reg_offset(s
, a
->ra
),
3422 status
, vsz
, vsz
, (a
->index
<< 1) | a
->sub
,
3424 tcg_temp_free_ptr(status
);
3430 *** SVE Floating Point Multiply Indexed Group
3433 static bool trans_FMUL_zzx(DisasContext
*s
, arg_FMUL_zzx
*a
, uint32_t insn
)
3435 static gen_helper_gvec_3_ptr
* const fns
[3] = {
3436 gen_helper_gvec_fmul_idx_h
,
3437 gen_helper_gvec_fmul_idx_s
,
3438 gen_helper_gvec_fmul_idx_d
,
3441 if (sve_access_check(s
)) {
3442 unsigned vsz
= vec_full_reg_size(s
);
3443 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3444 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3445 vec_full_reg_offset(s
, a
->rn
),
3446 vec_full_reg_offset(s
, a
->rm
),
3447 status
, vsz
, vsz
, a
->index
, fns
[a
->esz
- 1]);
3448 tcg_temp_free_ptr(status
);
3454 *** SVE Floating Point Accumulating Reduction Group
3457 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
3459 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3460 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3461 static fadda_fn
* const fns
[3] = {
3462 gen_helper_sve_fadda_h
,
3463 gen_helper_sve_fadda_s
,
3464 gen_helper_sve_fadda_d
,
3466 unsigned vsz
= vec_full_reg_size(s
);
3467 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3474 if (!sve_access_check(s
)) {
3478 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3479 t_rm
= tcg_temp_new_ptr();
3480 t_pg
= tcg_temp_new_ptr();
3481 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3482 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3483 t_fpst
= get_fpstatus_ptr(a
->esz
== MO_16
);
3484 t_desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3486 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3488 tcg_temp_free_i32(t_desc
);
3489 tcg_temp_free_ptr(t_fpst
);
3490 tcg_temp_free_ptr(t_pg
);
3491 tcg_temp_free_ptr(t_rm
);
3493 write_fp_dreg(s
, a
->rd
, t_val
);
3494 tcg_temp_free_i64(t_val
);
3499 *** SVE Floating Point Arithmetic - Unpredicated Group
3502 static bool do_zzz_fp(DisasContext
*s
, arg_rrr_esz
*a
,
3503 gen_helper_gvec_3_ptr
*fn
)
3508 if (sve_access_check(s
)) {
3509 unsigned vsz
= vec_full_reg_size(s
);
3510 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3511 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
3512 vec_full_reg_offset(s
, a
->rn
),
3513 vec_full_reg_offset(s
, a
->rm
),
3514 status
, vsz
, vsz
, 0, fn
);
3515 tcg_temp_free_ptr(status
);
3521 #define DO_FP3(NAME, name) \
3522 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3524 static gen_helper_gvec_3_ptr * const fns[4] = { \
3525 NULL, gen_helper_gvec_##name##_h, \
3526 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3528 return do_zzz_fp(s, a, fns[a->esz]); \
3531 DO_FP3(FADD_zzz
, fadd
)
3532 DO_FP3(FSUB_zzz
, fsub
)
3533 DO_FP3(FMUL_zzz
, fmul
)
3534 DO_FP3(FTSMUL
, ftsmul
)
3535 DO_FP3(FRECPS
, recps
)
3536 DO_FP3(FRSQRTS
, rsqrts
)
3541 *** SVE Floating Point Arithmetic - Predicated Group
3544 static bool do_zpzz_fp(DisasContext
*s
, arg_rprr_esz
*a
,
3545 gen_helper_gvec_4_ptr
*fn
)
3550 if (sve_access_check(s
)) {
3551 unsigned vsz
= vec_full_reg_size(s
);
3552 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3553 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, a
->rd
),
3554 vec_full_reg_offset(s
, a
->rn
),
3555 vec_full_reg_offset(s
, a
->rm
),
3556 pred_full_reg_offset(s
, a
->pg
),
3557 status
, vsz
, vsz
, 0, fn
);
3558 tcg_temp_free_ptr(status
);
3563 #define DO_FP3(NAME, name) \
3564 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3566 static gen_helper_gvec_4_ptr * const fns[4] = { \
3567 NULL, gen_helper_sve_##name##_h, \
3568 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3570 return do_zpzz_fp(s, a, fns[a->esz]); \
3573 DO_FP3(FADD_zpzz
, fadd
)
3574 DO_FP3(FSUB_zpzz
, fsub
)
3575 DO_FP3(FMUL_zpzz
, fmul
)
3576 DO_FP3(FMIN_zpzz
, fmin
)
3577 DO_FP3(FMAX_zpzz
, fmax
)
3578 DO_FP3(FMINNM_zpzz
, fminnum
)
3579 DO_FP3(FMAXNM_zpzz
, fmaxnum
)
3581 DO_FP3(FSCALE
, fscalbn
)
3583 DO_FP3(FMULX
, fmulx
)
3587 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
3588 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
3590 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
3591 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
3593 unsigned vsz
= vec_full_reg_size(s
);
3594 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
3597 t_zd
= tcg_temp_new_ptr();
3598 t_zn
= tcg_temp_new_ptr();
3599 t_pg
= tcg_temp_new_ptr();
3600 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
3601 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
3602 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3604 status
= get_fpstatus_ptr(is_fp16
);
3605 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
3606 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
3608 tcg_temp_free_i32(desc
);
3609 tcg_temp_free_ptr(status
);
3610 tcg_temp_free_ptr(t_pg
);
3611 tcg_temp_free_ptr(t_zn
);
3612 tcg_temp_free_ptr(t_zd
);
3615 static void do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
3616 gen_helper_sve_fp2scalar
*fn
)
3618 TCGv_i64 temp
= tcg_const_i64(imm
);
3619 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
, temp
, fn
);
3620 tcg_temp_free_i64(temp
);
3623 #define DO_FP_IMM(NAME, name, const0, const1) \
3624 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
3627 static gen_helper_sve_fp2scalar * const fns[3] = { \
3628 gen_helper_sve_##name##_h, \
3629 gen_helper_sve_##name##_s, \
3630 gen_helper_sve_##name##_d \
3632 static uint64_t const val[3][2] = { \
3633 { float16_##const0, float16_##const1 }, \
3634 { float32_##const0, float32_##const1 }, \
3635 { float64_##const0, float64_##const1 }, \
3637 if (a->esz == 0) { \
3640 if (sve_access_check(s)) { \
3641 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3646 #define float16_two make_float16(0x4000)
3647 #define float32_two make_float32(0x40000000)
3648 #define float64_two make_float64(0x4000000000000000ULL)
3650 DO_FP_IMM(FADD
, fadds
, half
, one
)
3651 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
3652 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
3653 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
3654 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
3655 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
3656 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
3657 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
3661 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
3662 gen_helper_gvec_4_ptr
*fn
)
3667 if (sve_access_check(s
)) {
3668 unsigned vsz
= vec_full_reg_size(s
);
3669 TCGv_ptr status
= get_fpstatus_ptr(a
->esz
== MO_16
);
3670 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
3671 vec_full_reg_offset(s
, a
->rn
),
3672 vec_full_reg_offset(s
, a
->rm
),
3673 pred_full_reg_offset(s
, a
->pg
),
3674 status
, vsz
, vsz
, 0, fn
);
3675 tcg_temp_free_ptr(status
);
3680 #define DO_FPCMP(NAME, name) \
3681 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
3684 static gen_helper_gvec_4_ptr * const fns[4] = { \
3685 NULL, gen_helper_sve_##name##_h, \
3686 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3688 return do_fp_cmp(s, a, fns[a->esz]); \
3691 DO_FPCMP(FCMGE
, fcmge
)
3692 DO_FPCMP(FCMGT
, fcmgt
)
3693 DO_FPCMP(FCMEQ
, fcmeq
)
3694 DO_FPCMP(FCMNE
, fcmne
)
3695 DO_FPCMP(FCMUO
, fcmuo
)
3696 DO_FPCMP(FACGE
, facge
)
3697 DO_FPCMP(FACGT
, facgt
)
3701 typedef void gen_helper_sve_fmla(TCGv_env
, TCGv_ptr
, TCGv_i32
);
3703 static bool do_fmla(DisasContext
*s
, arg_rprrr_esz
*a
, gen_helper_sve_fmla
*fn
)
3708 if (!sve_access_check(s
)) {
3712 unsigned vsz
= vec_full_reg_size(s
);
3715 TCGv_ptr pg
= tcg_temp_new_ptr();
3717 /* We would need 7 operands to pass these arguments "properly".
3718 * So we encode all the register numbers into the descriptor.
3720 desc
= deposit32(a
->rd
, 5, 5, a
->rn
);
3721 desc
= deposit32(desc
, 10, 5, a
->rm
);
3722 desc
= deposit32(desc
, 15, 5, a
->ra
);
3723 desc
= simd_desc(vsz
, vsz
, desc
);
3725 t_desc
= tcg_const_i32(desc
);
3726 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3727 fn(cpu_env
, pg
, t_desc
);
3728 tcg_temp_free_i32(t_desc
);
3729 tcg_temp_free_ptr(pg
);
3733 #define DO_FMLA(NAME, name) \
3734 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3736 static gen_helper_sve_fmla * const fns[4] = { \
3737 NULL, gen_helper_sve_##name##_h, \
3738 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3740 return do_fmla(s, a, fns[a->esz]); \
3743 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
3744 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
3745 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
3746 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
3751 *** SVE Floating Point Unary Operations Predicated Group
3754 static bool do_zpz_ptr(DisasContext
*s
, int rd
, int rn
, int pg
,
3755 bool is_fp16
, gen_helper_gvec_3_ptr
*fn
)
3757 if (sve_access_check(s
)) {
3758 unsigned vsz
= vec_full_reg_size(s
);
3759 TCGv_ptr status
= get_fpstatus_ptr(is_fp16
);
3760 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
3761 vec_full_reg_offset(s
, rn
),
3762 pred_full_reg_offset(s
, pg
),
3763 status
, vsz
, vsz
, 0, fn
);
3764 tcg_temp_free_ptr(status
);
3769 static bool trans_SCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3771 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_hh
);
3774 static bool trans_SCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3776 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_sh
);
3779 static bool trans_SCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3781 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_scvt_dh
);
3784 static bool trans_SCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3786 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ss
);
3789 static bool trans_SCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3791 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_ds
);
3794 static bool trans_SCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3796 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_sd
);
3799 static bool trans_SCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3801 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_scvt_dd
);
3804 static bool trans_UCVTF_hh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3806 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_hh
);
3809 static bool trans_UCVTF_sh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3811 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_sh
);
3814 static bool trans_UCVTF_dh(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3816 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, true, gen_helper_sve_ucvt_dh
);
3819 static bool trans_UCVTF_ss(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3821 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ss
);
3824 static bool trans_UCVTF_ds(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3826 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_ds
);
3829 static bool trans_UCVTF_sd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3831 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_sd
);
3834 static bool trans_UCVTF_dd(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
3836 return do_zpz_ptr(s
, a
->rd
, a
->rn
, a
->pg
, false, gen_helper_sve_ucvt_dd
);
3840 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3843 /* Subroutine loading a vector register at VOFS of LEN bytes.
3844 * The load should begin at the address Rn + IMM.
3847 static void do_ldr(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
3850 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
3851 uint32_t len_remain
= len
% 8;
3852 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
3853 int midx
= get_mem_index(s
);
3854 TCGv_i64 addr
, t0
, t1
;
3856 addr
= tcg_temp_new_i64();
3857 t0
= tcg_temp_new_i64();
3859 /* Note that unpredicated load/store of vector/predicate registers
3860 * are defined as a stream of bytes, which equates to little-endian
3861 * operations on larger quantities. There is no nice way to force
3862 * a little-endian load for aarch64_be-linux-user out of line.
3864 * Attempt to keep code expansion to a minimum by limiting the
3865 * amount of unrolling done.
3870 for (i
= 0; i
< len_align
; i
+= 8) {
3871 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
3872 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
3873 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
3876 TCGLabel
*loop
= gen_new_label();
3877 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
3879 gen_set_label(loop
);
3881 /* Minimize the number of local temps that must be re-read from
3882 * the stack each iteration. Instead, re-compute values other
3883 * than the loop counter.
3885 tp
= tcg_temp_new_ptr();
3886 tcg_gen_addi_ptr(tp
, i
, imm
);
3887 tcg_gen_extu_ptr_i64(addr
, tp
);
3888 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
3890 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
3892 tcg_gen_add_ptr(tp
, cpu_env
, i
);
3893 tcg_gen_addi_ptr(i
, i
, 8);
3894 tcg_gen_st_i64(t0
, tp
, vofs
);
3895 tcg_temp_free_ptr(tp
);
3897 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
3898 tcg_temp_free_ptr(i
);
3901 /* Predicate register loads can be any multiple of 2.
3902 * Note that we still store the entire 64-bit unit into cpu_env.
3905 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
3907 switch (len_remain
) {
3911 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
3915 t1
= tcg_temp_new_i64();
3916 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
3917 tcg_gen_addi_i64(addr
, addr
, 4);
3918 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
3919 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
3920 tcg_temp_free_i64(t1
);
3924 g_assert_not_reached();
3926 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
3928 tcg_temp_free_i64(addr
);
3929 tcg_temp_free_i64(t0
);
3932 /* Similarly for stores. */
3933 static void do_str(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
3936 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
3937 uint32_t len_remain
= len
% 8;
3938 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
3939 int midx
= get_mem_index(s
);
3942 addr
= tcg_temp_new_i64();
3943 t0
= tcg_temp_new_i64();
3945 /* Note that unpredicated load/store of vector/predicate registers
3946 * are defined as a stream of bytes, which equates to little-endian
3947 * operations on larger quantities. There is no nice way to force
3948 * a little-endian store for aarch64_be-linux-user out of line.
3950 * Attempt to keep code expansion to a minimum by limiting the
3951 * amount of unrolling done.
3956 for (i
= 0; i
< len_align
; i
+= 8) {
3957 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ i
);
3958 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
3959 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
3962 TCGLabel
*loop
= gen_new_label();
3963 TCGv_ptr t2
, i
= tcg_const_local_ptr(0);
3965 gen_set_label(loop
);
3967 t2
= tcg_temp_new_ptr();
3968 tcg_gen_add_ptr(t2
, cpu_env
, i
);
3969 tcg_gen_ld_i64(t0
, t2
, vofs
);
3971 /* Minimize the number of local temps that must be re-read from
3972 * the stack each iteration. Instead, re-compute values other
3973 * than the loop counter.
3975 tcg_gen_addi_ptr(t2
, i
, imm
);
3976 tcg_gen_extu_ptr_i64(addr
, t2
);
3977 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
3978 tcg_temp_free_ptr(t2
);
3980 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEQ
);
3982 tcg_gen_addi_ptr(i
, i
, 8);
3984 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
3985 tcg_temp_free_ptr(i
);
3988 /* Predicate register stores can be any multiple of 2. */
3990 tcg_gen_ld_i64(t0
, cpu_env
, vofs
+ len_align
);
3991 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
3993 switch (len_remain
) {
3997 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
4001 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUL
);
4002 tcg_gen_addi_i64(addr
, addr
, 4);
4003 tcg_gen_shri_i64(t0
, t0
, 32);
4004 tcg_gen_qemu_st_i64(t0
, addr
, midx
, MO_LEUW
);
4008 g_assert_not_reached();
4011 tcg_temp_free_i64(addr
);
4012 tcg_temp_free_i64(t0
);
4015 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4017 if (sve_access_check(s
)) {
4018 int size
= vec_full_reg_size(s
);
4019 int off
= vec_full_reg_offset(s
, a
->rd
);
4020 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4025 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4027 if (sve_access_check(s
)) {
4028 int size
= pred_full_reg_size(s
);
4029 int off
= pred_full_reg_offset(s
, a
->rd
);
4030 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
4035 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4037 if (sve_access_check(s
)) {
4038 int size
= vec_full_reg_size(s
);
4039 int off
= vec_full_reg_offset(s
, a
->rd
);
4040 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4045 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
4047 if (sve_access_check(s
)) {
4048 int size
= pred_full_reg_size(s
);
4049 int off
= pred_full_reg_offset(s
, a
->rd
);
4050 do_str(s
, off
, size
, a
->rn
, a
->imm
* size
);
4056 *** SVE Memory - Contiguous Load Group
4059 /* The memory mode of the dtype. */
4060 static const TCGMemOp dtype_mop
[16] = {
4061 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4062 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4063 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4064 MO_SB
, MO_SB
, MO_SB
, MO_Q
4067 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4069 /* The vector element size of dtype. */
4070 static const uint8_t dtype_esz
[16] = {
4077 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4078 gen_helper_gvec_mem
*fn
)
4080 unsigned vsz
= vec_full_reg_size(s
);
4084 /* For e.g. LD4, there are not enough arguments to pass all 4
4085 * registers as pointers, so encode the regno into the data field.
4086 * For consistency, do this even for LD1.
4088 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, zt
));
4089 t_pg
= tcg_temp_new_ptr();
4091 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4092 fn(cpu_env
, t_pg
, addr
, desc
);
4094 tcg_temp_free_ptr(t_pg
);
4095 tcg_temp_free_i32(desc
);
4098 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4099 TCGv_i64 addr
, int dtype
, int nreg
)
4101 static gen_helper_gvec_mem
* const fns
[16][4] = {
4102 { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4103 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4104 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4105 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4106 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4108 { gen_helper_sve_ld1sds_r
, NULL
, NULL
, NULL
},
4109 { gen_helper_sve_ld1hh_r
, gen_helper_sve_ld2hh_r
,
4110 gen_helper_sve_ld3hh_r
, gen_helper_sve_ld4hh_r
},
4111 { gen_helper_sve_ld1hsu_r
, NULL
, NULL
, NULL
},
4112 { gen_helper_sve_ld1hdu_r
, NULL
, NULL
, NULL
},
4114 { gen_helper_sve_ld1hds_r
, NULL
, NULL
, NULL
},
4115 { gen_helper_sve_ld1hss_r
, NULL
, NULL
, NULL
},
4116 { gen_helper_sve_ld1ss_r
, gen_helper_sve_ld2ss_r
,
4117 gen_helper_sve_ld3ss_r
, gen_helper_sve_ld4ss_r
},
4118 { gen_helper_sve_ld1sdu_r
, NULL
, NULL
, NULL
},
4120 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4121 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4122 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4123 { gen_helper_sve_ld1dd_r
, gen_helper_sve_ld2dd_r
,
4124 gen_helper_sve_ld3dd_r
, gen_helper_sve_ld4dd_r
},
4126 gen_helper_gvec_mem
*fn
= fns
[dtype
][nreg
];
4128 /* While there are holes in the table, they are not
4129 * accessible via the instruction encoding.
4132 do_mem_zpa(s
, zt
, pg
, addr
, fn
);
4135 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
, uint32_t insn
)
4140 if (sve_access_check(s
)) {
4141 TCGv_i64 addr
= new_tmp_a64(s
);
4142 tcg_gen_muli_i64(addr
, cpu_reg(s
, a
->rm
),
4143 (a
->nreg
+ 1) << dtype_msz(a
->dtype
));
4144 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4145 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4150 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4152 if (sve_access_check(s
)) {
4153 int vsz
= vec_full_reg_size(s
);
4154 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4155 TCGv_i64 addr
= new_tmp_a64(s
);
4157 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4158 (a
->imm
* elements
* (a
->nreg
+ 1))
4159 << dtype_msz(a
->dtype
));
4160 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4165 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
, uint32_t insn
)
4167 static gen_helper_gvec_mem
* const fns
[16] = {
4168 gen_helper_sve_ldff1bb_r
,
4169 gen_helper_sve_ldff1bhu_r
,
4170 gen_helper_sve_ldff1bsu_r
,
4171 gen_helper_sve_ldff1bdu_r
,
4173 gen_helper_sve_ldff1sds_r
,
4174 gen_helper_sve_ldff1hh_r
,
4175 gen_helper_sve_ldff1hsu_r
,
4176 gen_helper_sve_ldff1hdu_r
,
4178 gen_helper_sve_ldff1hds_r
,
4179 gen_helper_sve_ldff1hss_r
,
4180 gen_helper_sve_ldff1ss_r
,
4181 gen_helper_sve_ldff1sdu_r
,
4183 gen_helper_sve_ldff1bds_r
,
4184 gen_helper_sve_ldff1bss_r
,
4185 gen_helper_sve_ldff1bhs_r
,
4186 gen_helper_sve_ldff1dd_r
,
4189 if (sve_access_check(s
)) {
4190 TCGv_i64 addr
= new_tmp_a64(s
);
4191 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4192 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4193 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, fns
[a
->dtype
]);
4198 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4200 static gen_helper_gvec_mem
* const fns
[16] = {
4201 gen_helper_sve_ldnf1bb_r
,
4202 gen_helper_sve_ldnf1bhu_r
,
4203 gen_helper_sve_ldnf1bsu_r
,
4204 gen_helper_sve_ldnf1bdu_r
,
4206 gen_helper_sve_ldnf1sds_r
,
4207 gen_helper_sve_ldnf1hh_r
,
4208 gen_helper_sve_ldnf1hsu_r
,
4209 gen_helper_sve_ldnf1hdu_r
,
4211 gen_helper_sve_ldnf1hds_r
,
4212 gen_helper_sve_ldnf1hss_r
,
4213 gen_helper_sve_ldnf1ss_r
,
4214 gen_helper_sve_ldnf1sdu_r
,
4216 gen_helper_sve_ldnf1bds_r
,
4217 gen_helper_sve_ldnf1bss_r
,
4218 gen_helper_sve_ldnf1bhs_r
,
4219 gen_helper_sve_ldnf1dd_r
,
4222 if (sve_access_check(s
)) {
4223 int vsz
= vec_full_reg_size(s
);
4224 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4225 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4226 TCGv_i64 addr
= new_tmp_a64(s
);
4228 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4229 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, fns
[a
->dtype
]);
4234 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int msz
)
4236 static gen_helper_gvec_mem
* const fns
[4] = {
4237 gen_helper_sve_ld1bb_r
, gen_helper_sve_ld1hh_r
,
4238 gen_helper_sve_ld1ss_r
, gen_helper_sve_ld1dd_r
,
4240 unsigned vsz
= vec_full_reg_size(s
);
4244 /* Load the first quadword using the normal predicated load helpers. */
4245 desc
= tcg_const_i32(simd_desc(16, 16, zt
));
4246 t_pg
= tcg_temp_new_ptr();
4248 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4249 fns
[msz
](cpu_env
, t_pg
, addr
, desc
);
4251 tcg_temp_free_ptr(t_pg
);
4252 tcg_temp_free_i32(desc
);
4254 /* Replicate that first quadword. */
4256 unsigned dofs
= vec_full_reg_offset(s
, zt
);
4257 tcg_gen_gvec_dup_mem(4, dofs
+ 16, dofs
, vsz
- 16, vsz
- 16);
4261 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
, uint32_t insn
)
4266 if (sve_access_check(s
)) {
4267 int msz
= dtype_msz(a
->dtype
);
4268 TCGv_i64 addr
= new_tmp_a64(s
);
4269 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
4270 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4271 do_ldrq(s
, a
->rd
, a
->pg
, addr
, msz
);
4276 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4278 if (sve_access_check(s
)) {
4279 TCGv_i64 addr
= new_tmp_a64(s
);
4280 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
4281 do_ldrq(s
, a
->rd
, a
->pg
, addr
, dtype_msz(a
->dtype
));
4286 /* Load and broadcast element. */
4287 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
, uint32_t insn
)
4289 if (!sve_access_check(s
)) {
4293 unsigned vsz
= vec_full_reg_size(s
);
4294 unsigned psz
= pred_full_reg_size(s
);
4295 unsigned esz
= dtype_esz
[a
->dtype
];
4296 TCGLabel
*over
= gen_new_label();
4299 /* If the guarding predicate has no bits set, no load occurs. */
4301 /* Reduce the pred_esz_masks value simply to reduce the
4302 * size of the code generated here.
4304 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
4305 temp
= tcg_temp_new_i64();
4306 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
4307 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
4308 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
4309 tcg_temp_free_i64(temp
);
4311 TCGv_i32 t32
= tcg_temp_new_i32();
4312 find_last_active(s
, t32
, esz
, a
->pg
);
4313 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
4314 tcg_temp_free_i32(t32
);
4317 /* Load the data. */
4318 temp
= tcg_temp_new_i64();
4319 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< esz
);
4320 tcg_gen_qemu_ld_i64(temp
, temp
, get_mem_index(s
),
4321 s
->be_data
| dtype_mop
[a
->dtype
]);
4323 /* Broadcast to *all* elements. */
4324 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
4326 tcg_temp_free_i64(temp
);
4328 /* Zero the inactive elements. */
4329 gen_set_label(over
);
4330 do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
);
4334 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4335 int msz
, int esz
, int nreg
)
4337 static gen_helper_gvec_mem
* const fn_single
[4][4] = {
4338 { gen_helper_sve_st1bb_r
, gen_helper_sve_st1bh_r
,
4339 gen_helper_sve_st1bs_r
, gen_helper_sve_st1bd_r
},
4340 { NULL
, gen_helper_sve_st1hh_r
,
4341 gen_helper_sve_st1hs_r
, gen_helper_sve_st1hd_r
},
4343 gen_helper_sve_st1ss_r
, gen_helper_sve_st1sd_r
},
4344 { NULL
, NULL
, NULL
, gen_helper_sve_st1dd_r
},
4346 static gen_helper_gvec_mem
* const fn_multiple
[3][4] = {
4347 { gen_helper_sve_st2bb_r
, gen_helper_sve_st2hh_r
,
4348 gen_helper_sve_st2ss_r
, gen_helper_sve_st2dd_r
},
4349 { gen_helper_sve_st3bb_r
, gen_helper_sve_st3hh_r
,
4350 gen_helper_sve_st3ss_r
, gen_helper_sve_st3dd_r
},
4351 { gen_helper_sve_st4bb_r
, gen_helper_sve_st4hh_r
,
4352 gen_helper_sve_st4ss_r
, gen_helper_sve_st4dd_r
},
4354 gen_helper_gvec_mem
*fn
;
4358 fn
= fn_single
[msz
][esz
];
4360 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4362 fn
= fn_multiple
[nreg
- 1][msz
];
4365 do_mem_zpa(s
, zt
, pg
, addr
, fn
);
4368 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
, uint32_t insn
)
4370 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
4373 if (sve_access_check(s
)) {
4374 TCGv_i64 addr
= new_tmp_a64(s
);
4375 tcg_gen_muli_i64(addr
, cpu_reg(s
, a
->rm
), (a
->nreg
+ 1) << a
->msz
);
4376 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4377 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
4382 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
, uint32_t insn
)
4384 if (a
->msz
> a
->esz
) {
4387 if (sve_access_check(s
)) {
4388 int vsz
= vec_full_reg_size(s
);
4389 int elements
= vsz
>> a
->esz
;
4390 TCGv_i64 addr
= new_tmp_a64(s
);
4392 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4393 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
4394 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
4400 *** SVE gather loads / scatter stores
4403 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
, int scale
,
4404 TCGv_i64 scalar
, gen_helper_gvec_mem_scatter
*fn
)
4406 unsigned vsz
= vec_full_reg_size(s
);
4407 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, scale
));
4408 TCGv_ptr t_zm
= tcg_temp_new_ptr();
4409 TCGv_ptr t_pg
= tcg_temp_new_ptr();
4410 TCGv_ptr t_zt
= tcg_temp_new_ptr();
4412 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4413 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
4414 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
4415 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, desc
);
4417 tcg_temp_free_ptr(t_zt
);
4418 tcg_temp_free_ptr(t_zm
);
4419 tcg_temp_free_ptr(t_pg
);
4420 tcg_temp_free_i32(desc
);
4423 /* Indexed by [ff][xs][u][msz]. */
4424 static gen_helper_gvec_mem_scatter
* const gather_load_fn32
[2][2][2][3] = {
4425 { { { gen_helper_sve_ldbss_zsu
,
4426 gen_helper_sve_ldhss_zsu
,
4428 { gen_helper_sve_ldbsu_zsu
,
4429 gen_helper_sve_ldhsu_zsu
,
4430 gen_helper_sve_ldssu_zsu
, } },
4431 { { gen_helper_sve_ldbss_zss
,
4432 gen_helper_sve_ldhss_zss
,
4434 { gen_helper_sve_ldbsu_zss
,
4435 gen_helper_sve_ldhsu_zss
,
4436 gen_helper_sve_ldssu_zss
, } } },
4438 { { { gen_helper_sve_ldffbss_zsu
,
4439 gen_helper_sve_ldffhss_zsu
,
4441 { gen_helper_sve_ldffbsu_zsu
,
4442 gen_helper_sve_ldffhsu_zsu
,
4443 gen_helper_sve_ldffssu_zsu
, } },
4444 { { gen_helper_sve_ldffbss_zss
,
4445 gen_helper_sve_ldffhss_zss
,
4447 { gen_helper_sve_ldffbsu_zss
,
4448 gen_helper_sve_ldffhsu_zss
,
4449 gen_helper_sve_ldffssu_zss
, } } }
4452 /* Note that we overload xs=2 to indicate 64-bit offset. */
4453 static gen_helper_gvec_mem_scatter
* const gather_load_fn64
[2][3][2][4] = {
4454 { { { gen_helper_sve_ldbds_zsu
,
4455 gen_helper_sve_ldhds_zsu
,
4456 gen_helper_sve_ldsds_zsu
,
4458 { gen_helper_sve_ldbdu_zsu
,
4459 gen_helper_sve_ldhdu_zsu
,
4460 gen_helper_sve_ldsdu_zsu
,
4461 gen_helper_sve_ldddu_zsu
, } },
4462 { { gen_helper_sve_ldbds_zss
,
4463 gen_helper_sve_ldhds_zss
,
4464 gen_helper_sve_ldsds_zss
,
4466 { gen_helper_sve_ldbdu_zss
,
4467 gen_helper_sve_ldhdu_zss
,
4468 gen_helper_sve_ldsdu_zss
,
4469 gen_helper_sve_ldddu_zss
, } },
4470 { { gen_helper_sve_ldbds_zd
,
4471 gen_helper_sve_ldhds_zd
,
4472 gen_helper_sve_ldsds_zd
,
4474 { gen_helper_sve_ldbdu_zd
,
4475 gen_helper_sve_ldhdu_zd
,
4476 gen_helper_sve_ldsdu_zd
,
4477 gen_helper_sve_ldddu_zd
, } } },
4479 { { { gen_helper_sve_ldffbds_zsu
,
4480 gen_helper_sve_ldffhds_zsu
,
4481 gen_helper_sve_ldffsds_zsu
,
4483 { gen_helper_sve_ldffbdu_zsu
,
4484 gen_helper_sve_ldffhdu_zsu
,
4485 gen_helper_sve_ldffsdu_zsu
,
4486 gen_helper_sve_ldffddu_zsu
, } },
4487 { { gen_helper_sve_ldffbds_zss
,
4488 gen_helper_sve_ldffhds_zss
,
4489 gen_helper_sve_ldffsds_zss
,
4491 { gen_helper_sve_ldffbdu_zss
,
4492 gen_helper_sve_ldffhdu_zss
,
4493 gen_helper_sve_ldffsdu_zss
,
4494 gen_helper_sve_ldffddu_zss
, } },
4495 { { gen_helper_sve_ldffbds_zd
,
4496 gen_helper_sve_ldffhds_zd
,
4497 gen_helper_sve_ldffsds_zd
,
4499 { gen_helper_sve_ldffbdu_zd
,
4500 gen_helper_sve_ldffhdu_zd
,
4501 gen_helper_sve_ldffsdu_zd
,
4502 gen_helper_sve_ldffddu_zd
, } } }
4505 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
, uint32_t insn
)
4507 gen_helper_gvec_mem_scatter
*fn
= NULL
;
4509 if (!sve_access_check(s
)) {
4515 fn
= gather_load_fn32
[a
->ff
][a
->xs
][a
->u
][a
->msz
];
4518 fn
= gather_load_fn64
[a
->ff
][a
->xs
][a
->u
][a
->msz
];
4523 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
4524 cpu_reg_sp(s
, a
->rn
), fn
);
4528 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
, uint32_t insn
)
4530 gen_helper_gvec_mem_scatter
*fn
= NULL
;
4533 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
4536 if (!sve_access_check(s
)) {
4542 fn
= gather_load_fn32
[a
->ff
][0][a
->u
][a
->msz
];
4545 fn
= gather_load_fn64
[a
->ff
][2][a
->u
][a
->msz
];
4550 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4551 * by loading the immediate into the scalar parameter.
4553 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
4554 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, fn
);
4555 tcg_temp_free_i64(imm
);
4559 /* Indexed by [xs][msz]. */
4560 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][3] = {
4561 { gen_helper_sve_stbs_zsu
,
4562 gen_helper_sve_sths_zsu
,
4563 gen_helper_sve_stss_zsu
, },
4564 { gen_helper_sve_stbs_zss
,
4565 gen_helper_sve_sths_zss
,
4566 gen_helper_sve_stss_zss
, },
4569 /* Note that we overload xs=2 to indicate 64-bit offset. */
4570 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[3][4] = {
4571 { gen_helper_sve_stbd_zsu
,
4572 gen_helper_sve_sthd_zsu
,
4573 gen_helper_sve_stsd_zsu
,
4574 gen_helper_sve_stdd_zsu
, },
4575 { gen_helper_sve_stbd_zss
,
4576 gen_helper_sve_sthd_zss
,
4577 gen_helper_sve_stsd_zss
,
4578 gen_helper_sve_stdd_zss
, },
4579 { gen_helper_sve_stbd_zd
,
4580 gen_helper_sve_sthd_zd
,
4581 gen_helper_sve_stsd_zd
,
4582 gen_helper_sve_stdd_zd
, },
4585 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
, uint32_t insn
)
4587 gen_helper_gvec_mem_scatter
*fn
;
4589 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
4592 if (!sve_access_check(s
)) {
4597 fn
= scatter_store_fn32
[a
->xs
][a
->msz
];
4600 fn
= scatter_store_fn64
[a
->xs
][a
->msz
];
4603 g_assert_not_reached();
4605 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
4606 cpu_reg_sp(s
, a
->rn
), fn
);
4610 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
, uint32_t insn
)
4612 gen_helper_gvec_mem_scatter
*fn
= NULL
;
4615 if (a
->esz
< a
->msz
) {
4618 if (!sve_access_check(s
)) {
4624 fn
= scatter_store_fn32
[0][a
->msz
];
4627 fn
= scatter_store_fn64
[2][a
->msz
];
4632 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
4633 * by loading the immediate into the scalar parameter.
4635 imm
= tcg_const_i64(a
->imm
<< a
->msz
);
4636 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0, imm
, fn
);
4637 tcg_temp_free_i64(imm
);
4645 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
, uint32_t insn
)
4647 /* Prefetch is a nop within QEMU. */
4648 sve_access_check(s
);
4652 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
, uint32_t insn
)
4657 /* Prefetch is a nop within QEMU. */
4658 sve_access_check(s
);