2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 * Helpers for extracting complex instruction fields.
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
43 static int tszimm_esz(int x
)
45 x
>>= 3; /* discard imm3 */
49 static int tszimm_shr(int x
)
51 return (16 << tszimm_esz(x
)) - x
;
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x
)
57 return x
- (8 << tszimm_esz(x
));
61 * Include the generated decoder.
64 #include "decode-sve.inc.c"
67 * Implement all of the translator functions referenced by the decoder.
70 /* Return the offset info CPUARMState of the predicate vector register Pn.
71 * Note for this purpose, FFR is P16.
73 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
75 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
78 /* Return the byte size of the whole predicate register, VL / 64. */
79 static inline int pred_full_reg_size(DisasContext
*s
)
81 return s
->sve_len
>> 3;
84 /* Round up the size of a register to a size allowed by
85 * the tcg vector infrastructure. Any operation which uses this
86 * size may assume that the bits above pred_full_reg_size are zero,
87 * and must leave them the same way.
89 * Note that this is not needed for the vector registers as they
90 * are always properly sized for tcg vectors.
92 static int size_for_gvec(int size
)
97 return QEMU_ALIGN_UP(size
, 16);
101 static int pred_gvec_reg_size(DisasContext
*s
)
103 return size_for_gvec(pred_full_reg_size(s
));
106 /* Invoke a vector expander on two Zregs. */
107 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
108 int esz
, int rd
, int rn
)
110 if (sve_access_check(s
)) {
111 unsigned vsz
= vec_full_reg_size(s
);
112 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
113 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
118 /* Invoke a vector expander on three Zregs. */
119 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
120 int esz
, int rd
, int rn
, int rm
)
122 if (sve_access_check(s
)) {
123 unsigned vsz
= vec_full_reg_size(s
);
124 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
125 vec_full_reg_offset(s
, rn
),
126 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
131 /* Invoke a vector move on two Zregs. */
132 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
134 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
137 /* Initialize a Zreg with replications of a 64-bit immediate. */
138 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
140 unsigned vsz
= vec_full_reg_size(s
);
141 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
144 /* Invoke a vector expander on two Pregs. */
145 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
146 int esz
, int rd
, int rn
)
148 if (sve_access_check(s
)) {
149 unsigned psz
= pred_gvec_reg_size(s
);
150 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
151 pred_full_reg_offset(s
, rn
), psz
, psz
);
156 /* Invoke a vector expander on three Pregs. */
157 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
158 int esz
, int rd
, int rn
, int rm
)
160 if (sve_access_check(s
)) {
161 unsigned psz
= pred_gvec_reg_size(s
);
162 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
163 pred_full_reg_offset(s
, rn
),
164 pred_full_reg_offset(s
, rm
), psz
, psz
);
169 /* Invoke a vector operation on four Pregs. */
170 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
171 int rd
, int rn
, int rm
, int rg
)
173 if (sve_access_check(s
)) {
174 unsigned psz
= pred_gvec_reg_size(s
);
175 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
176 pred_full_reg_offset(s
, rn
),
177 pred_full_reg_offset(s
, rm
),
178 pred_full_reg_offset(s
, rg
),
184 /* Invoke a vector move on two Pregs. */
185 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
187 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
190 /* Set the cpu flags as per a return from an SVE helper. */
191 static void do_pred_flags(TCGv_i32 t
)
193 tcg_gen_mov_i32(cpu_NF
, t
);
194 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
195 tcg_gen_andi_i32(cpu_CF
, t
, 1);
196 tcg_gen_movi_i32(cpu_VF
, 0);
199 /* Subroutines computing the ARM PredTest psuedofunction. */
200 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
202 TCGv_i32 t
= tcg_temp_new_i32();
204 gen_helper_sve_predtest1(t
, d
, g
);
206 tcg_temp_free_i32(t
);
209 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
211 TCGv_ptr dptr
= tcg_temp_new_ptr();
212 TCGv_ptr gptr
= tcg_temp_new_ptr();
215 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
216 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
217 t
= tcg_const_i32(words
);
219 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
220 tcg_temp_free_ptr(dptr
);
221 tcg_temp_free_ptr(gptr
);
224 tcg_temp_free_i32(t
);
227 /* For each element size, the bits within a predicate word that are active. */
228 const uint64_t pred_esz_masks
[4] = {
229 0xffffffffffffffffull
, 0x5555555555555555ull
,
230 0x1111111111111111ull
, 0x0101010101010101ull
234 *** SVE Logical - Unpredicated Group
237 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
239 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
242 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
244 if (a
->rn
== a
->rm
) { /* MOV */
245 return do_mov_z(s
, a
->rd
, a
->rn
);
247 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
251 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
253 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
256 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
258 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
262 *** SVE Integer Arithmetic - Unpredicated Group
265 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
267 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
270 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
272 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
275 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
277 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
280 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
282 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
285 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
287 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
290 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
292 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
296 *** SVE Integer Arithmetic - Binary Predicated Group
299 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
301 unsigned vsz
= vec_full_reg_size(s
);
305 if (sve_access_check(s
)) {
306 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
307 vec_full_reg_offset(s
, a
->rn
),
308 vec_full_reg_offset(s
, a
->rm
),
309 pred_full_reg_offset(s
, a
->pg
),
315 #define DO_ZPZZ(NAME, name) \
316 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
319 static gen_helper_gvec_4 * const fns[4] = { \
320 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
321 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
323 return do_zpzz_ool(s, a, fns[a->esz]); \
342 DO_ZPZZ(SMULH
, smulh
)
343 DO_ZPZZ(UMULH
, umulh
)
349 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
351 static gen_helper_gvec_4
* const fns
[4] = {
352 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
354 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
357 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
359 static gen_helper_gvec_4
* const fns
[4] = {
360 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
362 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
368 *** SVE Integer Arithmetic - Unary Predicated Group
371 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
376 if (sve_access_check(s
)) {
377 unsigned vsz
= vec_full_reg_size(s
);
378 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
379 vec_full_reg_offset(s
, a
->rn
),
380 pred_full_reg_offset(s
, a
->pg
),
386 #define DO_ZPZ(NAME, name) \
387 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
389 static gen_helper_gvec_3 * const fns[4] = { \
390 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
391 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
393 return do_zpz_ool(s, a, fns[a->esz]); \
398 DO_ZPZ(CNT_zpz
, cnt_zpz
)
400 DO_ZPZ(NOT_zpz
, not_zpz
)
404 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
406 static gen_helper_gvec_3
* const fns
[4] = {
408 gen_helper_sve_fabs_h
,
409 gen_helper_sve_fabs_s
,
410 gen_helper_sve_fabs_d
412 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
415 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
417 static gen_helper_gvec_3
* const fns
[4] = {
419 gen_helper_sve_fneg_h
,
420 gen_helper_sve_fneg_s
,
421 gen_helper_sve_fneg_d
423 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
426 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
428 static gen_helper_gvec_3
* const fns
[4] = {
430 gen_helper_sve_sxtb_h
,
431 gen_helper_sve_sxtb_s
,
432 gen_helper_sve_sxtb_d
434 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
437 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
439 static gen_helper_gvec_3
* const fns
[4] = {
441 gen_helper_sve_uxtb_h
,
442 gen_helper_sve_uxtb_s
,
443 gen_helper_sve_uxtb_d
445 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
448 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
450 static gen_helper_gvec_3
* const fns
[4] = {
452 gen_helper_sve_sxth_s
,
453 gen_helper_sve_sxth_d
455 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
458 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
460 static gen_helper_gvec_3
* const fns
[4] = {
462 gen_helper_sve_uxth_s
,
463 gen_helper_sve_uxth_d
465 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
468 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
470 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
473 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
475 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
481 *** SVE Integer Reduction Group
484 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
485 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
486 gen_helper_gvec_reduc
*fn
)
488 unsigned vsz
= vec_full_reg_size(s
);
496 if (!sve_access_check(s
)) {
500 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
501 temp
= tcg_temp_new_i64();
502 t_zn
= tcg_temp_new_ptr();
503 t_pg
= tcg_temp_new_ptr();
505 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
506 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
507 fn(temp
, t_zn
, t_pg
, desc
);
508 tcg_temp_free_ptr(t_zn
);
509 tcg_temp_free_ptr(t_pg
);
510 tcg_temp_free_i32(desc
);
512 write_fp_dreg(s
, a
->rd
, temp
);
513 tcg_temp_free_i64(temp
);
517 #define DO_VPZ(NAME, name) \
518 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
520 static gen_helper_gvec_reduc * const fns[4] = { \
521 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
522 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
524 return do_vpz_ool(s, a, fns[a->esz]); \
537 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
539 static gen_helper_gvec_reduc
* const fns
[4] = {
540 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
541 gen_helper_sve_saddv_s
, NULL
543 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
549 *** SVE Shift by Immediate - Predicated Group
552 /* Store zero into every active element of Zd. We will use this for two
553 * and three-operand predicated instructions for which logic dictates a
556 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
558 static gen_helper_gvec_2
* const fns
[4] = {
559 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
560 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
562 if (sve_access_check(s
)) {
563 unsigned vsz
= vec_full_reg_size(s
);
564 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
565 pred_full_reg_offset(s
, pg
),
566 vsz
, vsz
, 0, fns
[esz
]);
571 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
572 gen_helper_gvec_3
*fn
)
574 if (sve_access_check(s
)) {
575 unsigned vsz
= vec_full_reg_size(s
);
576 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
577 vec_full_reg_offset(s
, a
->rn
),
578 pred_full_reg_offset(s
, a
->pg
),
579 vsz
, vsz
, a
->imm
, fn
);
584 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
586 static gen_helper_gvec_3
* const fns
[4] = {
587 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
588 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
591 /* Invalid tsz encoding -- see tszimm_esz. */
594 /* Shift by element size is architecturally valid. For
595 arithmetic right-shift, it's the same as by one less. */
596 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
597 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
600 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
602 static gen_helper_gvec_3
* const fns
[4] = {
603 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
604 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
609 /* Shift by element size is architecturally valid.
610 For logical shifts, it is a zeroing operation. */
611 if (a
->imm
>= (8 << a
->esz
)) {
612 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
614 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
618 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
620 static gen_helper_gvec_3
* const fns
[4] = {
621 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
622 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
627 /* Shift by element size is architecturally valid.
628 For logical shifts, it is a zeroing operation. */
629 if (a
->imm
>= (8 << a
->esz
)) {
630 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
632 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
636 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
638 static gen_helper_gvec_3
* const fns
[4] = {
639 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
640 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
645 /* Shift by element size is architecturally valid. For arithmetic
646 right shift for division, it is a zeroing operation. */
647 if (a
->imm
>= (8 << a
->esz
)) {
648 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
650 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
655 *** SVE Bitwise Shift - Predicated Group
658 #define DO_ZPZW(NAME, name) \
659 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
662 static gen_helper_gvec_4 * const fns[3] = { \
663 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
664 gen_helper_sve_##name##_zpzw_s, \
666 if (a->esz < 0 || a->esz >= 3) { \
669 return do_zpzz_ool(s, a, fns[a->esz]); \
679 *** SVE Bitwise Shift - Unpredicated Group
682 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
683 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
684 int64_t, uint32_t, uint32_t))
687 /* Invalid tsz encoding -- see tszimm_esz. */
690 if (sve_access_check(s
)) {
691 unsigned vsz
= vec_full_reg_size(s
);
692 /* Shift by element size is architecturally valid. For
693 arithmetic right-shift, it's the same as by one less.
694 Otherwise it is a zeroing operation. */
695 if (a
->imm
>= 8 << a
->esz
) {
697 a
->imm
= (8 << a
->esz
) - 1;
699 do_dupi_z(s
, a
->rd
, 0);
703 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
704 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
709 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
711 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
714 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
716 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
719 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
721 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
724 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
729 if (sve_access_check(s
)) {
730 unsigned vsz
= vec_full_reg_size(s
);
731 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
732 vec_full_reg_offset(s
, a
->rn
),
733 vec_full_reg_offset(s
, a
->rm
),
739 #define DO_ZZW(NAME, name) \
740 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
743 static gen_helper_gvec_3 * const fns[4] = { \
744 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
745 gen_helper_sve_##name##_zzw_s, NULL \
747 return do_zzw_ool(s, a, fns[a->esz]); \
757 *** SVE Integer Multiply-Add Group
760 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
761 gen_helper_gvec_5
*fn
)
763 if (sve_access_check(s
)) {
764 unsigned vsz
= vec_full_reg_size(s
);
765 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
766 vec_full_reg_offset(s
, a
->ra
),
767 vec_full_reg_offset(s
, a
->rn
),
768 vec_full_reg_offset(s
, a
->rm
),
769 pred_full_reg_offset(s
, a
->pg
),
775 #define DO_ZPZZZ(NAME, name) \
776 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
778 static gen_helper_gvec_5 * const fns[4] = { \
779 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
780 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
782 return do_zpzzz_ool(s, a, fns[a->esz]); \
791 *** SVE Index Generation Group
794 static void do_index(DisasContext
*s
, int esz
, int rd
,
795 TCGv_i64 start
, TCGv_i64 incr
)
797 unsigned vsz
= vec_full_reg_size(s
);
798 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
799 TCGv_ptr t_zd
= tcg_temp_new_ptr();
801 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
803 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
805 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
806 static index_fn
* const fns
[3] = {
807 gen_helper_sve_index_b
,
808 gen_helper_sve_index_h
,
809 gen_helper_sve_index_s
,
811 TCGv_i32 s32
= tcg_temp_new_i32();
812 TCGv_i32 i32
= tcg_temp_new_i32();
814 tcg_gen_extrl_i64_i32(s32
, start
);
815 tcg_gen_extrl_i64_i32(i32
, incr
);
816 fns
[esz
](t_zd
, s32
, i32
, desc
);
818 tcg_temp_free_i32(s32
);
819 tcg_temp_free_i32(i32
);
821 tcg_temp_free_ptr(t_zd
);
822 tcg_temp_free_i32(desc
);
825 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
, uint32_t insn
)
827 if (sve_access_check(s
)) {
828 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
829 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
830 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
831 tcg_temp_free_i64(start
);
832 tcg_temp_free_i64(incr
);
837 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
, uint32_t insn
)
839 if (sve_access_check(s
)) {
840 TCGv_i64 start
= tcg_const_i64(a
->imm
);
841 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
842 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
843 tcg_temp_free_i64(start
);
848 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
, uint32_t insn
)
850 if (sve_access_check(s
)) {
851 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
852 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
853 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
854 tcg_temp_free_i64(incr
);
859 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
, uint32_t insn
)
861 if (sve_access_check(s
)) {
862 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
863 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
864 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
870 *** SVE Stack Allocation Group
873 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
, uint32_t insn
)
875 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
876 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
877 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
881 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
, uint32_t insn
)
883 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
884 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
885 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
889 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
, uint32_t insn
)
891 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
892 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
897 *** SVE Predicate Logical Operations Group
900 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
901 const GVecGen4
*gvec_op
)
903 if (!sve_access_check(s
)) {
907 unsigned psz
= pred_gvec_reg_size(s
);
908 int dofs
= pred_full_reg_offset(s
, a
->rd
);
909 int nofs
= pred_full_reg_offset(s
, a
->rn
);
910 int mofs
= pred_full_reg_offset(s
, a
->rm
);
911 int gofs
= pred_full_reg_offset(s
, a
->pg
);
914 /* Do the operation and the flags generation in temps. */
915 TCGv_i64 pd
= tcg_temp_new_i64();
916 TCGv_i64 pn
= tcg_temp_new_i64();
917 TCGv_i64 pm
= tcg_temp_new_i64();
918 TCGv_i64 pg
= tcg_temp_new_i64();
920 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
921 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
922 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
924 gvec_op
->fni8(pd
, pn
, pm
, pg
);
925 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
927 do_predtest1(pd
, pg
);
929 tcg_temp_free_i64(pd
);
930 tcg_temp_free_i64(pn
);
931 tcg_temp_free_i64(pm
);
932 tcg_temp_free_i64(pg
);
934 /* The operation and flags generation is large. The computation
935 * of the flags depends on the original contents of the guarding
936 * predicate. If the destination overwrites the guarding predicate,
937 * then the easiest way to get this right is to save a copy.
940 if (a
->rd
== a
->pg
) {
941 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
942 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
945 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
946 do_predtest(s
, dofs
, tofs
, psz
/ 8);
951 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
953 tcg_gen_and_i64(pd
, pn
, pm
);
954 tcg_gen_and_i64(pd
, pd
, pg
);
957 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
958 TCGv_vec pm
, TCGv_vec pg
)
960 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
961 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
964 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
966 static const GVecGen4 op
= {
967 .fni8
= gen_and_pg_i64
,
968 .fniv
= gen_and_pg_vec
,
969 .fno
= gen_helper_sve_and_pppp
,
970 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
973 return do_pppp_flags(s
, a
, &op
);
974 } else if (a
->rn
== a
->rm
) {
975 if (a
->pg
== a
->rn
) {
976 return do_mov_p(s
, a
->rd
, a
->rn
);
978 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
980 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
981 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
983 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
987 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
989 tcg_gen_andc_i64(pd
, pn
, pm
);
990 tcg_gen_and_i64(pd
, pd
, pg
);
993 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
994 TCGv_vec pm
, TCGv_vec pg
)
996 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
997 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1000 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1002 static const GVecGen4 op
= {
1003 .fni8
= gen_bic_pg_i64
,
1004 .fniv
= gen_bic_pg_vec
,
1005 .fno
= gen_helper_sve_bic_pppp
,
1006 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1009 return do_pppp_flags(s
, a
, &op
);
1010 } else if (a
->pg
== a
->rn
) {
1011 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1013 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1017 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1019 tcg_gen_xor_i64(pd
, pn
, pm
);
1020 tcg_gen_and_i64(pd
, pd
, pg
);
1023 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1024 TCGv_vec pm
, TCGv_vec pg
)
1026 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1027 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1030 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1032 static const GVecGen4 op
= {
1033 .fni8
= gen_eor_pg_i64
,
1034 .fniv
= gen_eor_pg_vec
,
1035 .fno
= gen_helper_sve_eor_pppp
,
1036 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1039 return do_pppp_flags(s
, a
, &op
);
1041 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1045 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1047 tcg_gen_and_i64(pn
, pn
, pg
);
1048 tcg_gen_andc_i64(pm
, pm
, pg
);
1049 tcg_gen_or_i64(pd
, pn
, pm
);
1052 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1053 TCGv_vec pm
, TCGv_vec pg
)
1055 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1056 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1057 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1060 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1062 static const GVecGen4 op
= {
1063 .fni8
= gen_sel_pg_i64
,
1064 .fniv
= gen_sel_pg_vec
,
1065 .fno
= gen_helper_sve_sel_pppp
,
1066 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1071 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1075 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1077 tcg_gen_or_i64(pd
, pn
, pm
);
1078 tcg_gen_and_i64(pd
, pd
, pg
);
1081 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1082 TCGv_vec pm
, TCGv_vec pg
)
1084 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1085 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1088 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1090 static const GVecGen4 op
= {
1091 .fni8
= gen_orr_pg_i64
,
1092 .fniv
= gen_orr_pg_vec
,
1093 .fno
= gen_helper_sve_orr_pppp
,
1094 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1097 return do_pppp_flags(s
, a
, &op
);
1098 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1099 return do_mov_p(s
, a
->rd
, a
->rn
);
1101 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1105 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1107 tcg_gen_orc_i64(pd
, pn
, pm
);
1108 tcg_gen_and_i64(pd
, pd
, pg
);
1111 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1112 TCGv_vec pm
, TCGv_vec pg
)
1114 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1115 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1118 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1120 static const GVecGen4 op
= {
1121 .fni8
= gen_orn_pg_i64
,
1122 .fniv
= gen_orn_pg_vec
,
1123 .fno
= gen_helper_sve_orn_pppp
,
1124 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1127 return do_pppp_flags(s
, a
, &op
);
1129 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1133 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1135 tcg_gen_or_i64(pd
, pn
, pm
);
1136 tcg_gen_andc_i64(pd
, pg
, pd
);
1139 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1140 TCGv_vec pm
, TCGv_vec pg
)
1142 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1143 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1146 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1148 static const GVecGen4 op
= {
1149 .fni8
= gen_nor_pg_i64
,
1150 .fniv
= gen_nor_pg_vec
,
1151 .fno
= gen_helper_sve_nor_pppp
,
1152 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1155 return do_pppp_flags(s
, a
, &op
);
1157 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1161 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1163 tcg_gen_and_i64(pd
, pn
, pm
);
1164 tcg_gen_andc_i64(pd
, pg
, pd
);
1167 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1168 TCGv_vec pm
, TCGv_vec pg
)
1170 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1171 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1174 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1176 static const GVecGen4 op
= {
1177 .fni8
= gen_nand_pg_i64
,
1178 .fniv
= gen_nand_pg_vec
,
1179 .fno
= gen_helper_sve_nand_pppp
,
1180 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1183 return do_pppp_flags(s
, a
, &op
);
1185 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1190 *** SVE Predicate Misc Group
1193 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1195 if (sve_access_check(s
)) {
1196 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1197 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1198 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1201 TCGv_i64 pn
= tcg_temp_new_i64();
1202 TCGv_i64 pg
= tcg_temp_new_i64();
1204 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1205 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1206 do_predtest1(pn
, pg
);
1208 tcg_temp_free_i64(pn
);
1209 tcg_temp_free_i64(pg
);
1211 do_predtest(s
, nofs
, gofs
, words
);
1217 /* See the ARM pseudocode DecodePredCount. */
1218 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1220 unsigned elements
= fullsz
>> esz
;
1224 case 0x0: /* POW2 */
1225 return pow2floor(elements
);
1236 case 0x9: /* VL16 */
1237 case 0xa: /* VL32 */
1238 case 0xb: /* VL64 */
1239 case 0xc: /* VL128 */
1240 case 0xd: /* VL256 */
1241 bound
= 16 << (pattern
- 9);
1243 case 0x1d: /* MUL4 */
1244 return elements
- elements
% 4;
1245 case 0x1e: /* MUL3 */
1246 return elements
- elements
% 3;
1247 case 0x1f: /* ALL */
1249 default: /* #uimm5 */
1252 return elements
>= bound
? bound
: 0;
1255 /* This handles all of the predicate initialization instructions,
1256 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1257 * so that decode_pred_count returns 0. For SETFFR, we will have
1258 * set RD == 16 == FFR.
1260 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1262 if (!sve_access_check(s
)) {
1266 unsigned fullsz
= vec_full_reg_size(s
);
1267 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1268 unsigned numelem
, setsz
, i
;
1269 uint64_t word
, lastword
;
1272 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1274 /* Determine what we must store into each bit, and how many. */
1276 lastword
= word
= 0;
1279 setsz
= numelem
<< esz
;
1280 lastword
= word
= pred_esz_masks
[esz
];
1282 lastword
&= ~(-1ull << (setsz
% 64));
1286 t
= tcg_temp_new_i64();
1288 tcg_gen_movi_i64(t
, lastword
);
1289 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1293 if (word
== lastword
) {
1294 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1295 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1297 if (oprsz
* 8 == setsz
) {
1298 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1301 if (oprsz
* 8 == setsz
+ 8) {
1302 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1303 tcg_gen_movi_i64(t
, 0);
1304 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1312 tcg_gen_movi_i64(t
, word
);
1313 for (i
= 0; i
< setsz
; i
+= 8) {
1314 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1316 if (lastword
!= word
) {
1317 tcg_gen_movi_i64(t
, lastword
);
1318 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1322 tcg_gen_movi_i64(t
, 0);
1323 for (; i
< fullsz
; i
+= 8) {
1324 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1329 tcg_temp_free_i64(t
);
1333 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1334 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1335 tcg_gen_movi_i32(cpu_VF
, 0);
1336 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1341 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1343 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1346 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1348 /* Note pat == 31 is #all, to set all elements. */
1349 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1352 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1354 /* Note pat == 32 is #unimp, to set no elements. */
1355 return do_predset(s
, 0, a
->rd
, 32, false);
1358 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1360 /* The path through do_pppp_flags is complicated enough to want to avoid
1361 * duplication. Frob the arguments into the form of a predicated AND.
1363 arg_rprr_s alt_a
= {
1364 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1365 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1367 return trans_AND_pppp(s
, &alt_a
, insn
);
1370 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1372 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1375 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1377 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1380 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1381 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1382 TCGv_ptr
, TCGv_i32
))
1384 if (!sve_access_check(s
)) {
1388 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1389 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1393 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1394 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1396 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1397 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1398 t
= tcg_const_i32(desc
);
1400 gen_fn(t
, t_pd
, t_pg
, t
);
1401 tcg_temp_free_ptr(t_pd
);
1402 tcg_temp_free_ptr(t_pg
);
1405 tcg_temp_free_i32(t
);
1409 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1411 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1414 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1416 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1420 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1423 /* Subroutine loading a vector register at VOFS of LEN bytes.
1424 * The load should begin at the address Rn + IMM.
1427 static void do_ldr(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
1430 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
1431 uint32_t len_remain
= len
% 8;
1432 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
1433 int midx
= get_mem_index(s
);
1434 TCGv_i64 addr
, t0
, t1
;
1436 addr
= tcg_temp_new_i64();
1437 t0
= tcg_temp_new_i64();
1439 /* Note that unpredicated load/store of vector/predicate registers
1440 * are defined as a stream of bytes, which equates to little-endian
1441 * operations on larger quantities. There is no nice way to force
1442 * a little-endian load for aarch64_be-linux-user out of line.
1444 * Attempt to keep code expansion to a minimum by limiting the
1445 * amount of unrolling done.
1450 for (i
= 0; i
< len_align
; i
+= 8) {
1451 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
1452 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
1453 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
1456 TCGLabel
*loop
= gen_new_label();
1457 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
1459 gen_set_label(loop
);
1461 /* Minimize the number of local temps that must be re-read from
1462 * the stack each iteration. Instead, re-compute values other
1463 * than the loop counter.
1465 tp
= tcg_temp_new_ptr();
1466 tcg_gen_addi_ptr(tp
, i
, imm
);
1467 tcg_gen_extu_ptr_i64(addr
, tp
);
1468 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
1470 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
1472 tcg_gen_add_ptr(tp
, cpu_env
, i
);
1473 tcg_gen_addi_ptr(i
, i
, 8);
1474 tcg_gen_st_i64(t0
, tp
, vofs
);
1475 tcg_temp_free_ptr(tp
);
1477 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
1478 tcg_temp_free_ptr(i
);
1481 /* Predicate register loads can be any multiple of 2.
1482 * Note that we still store the entire 64-bit unit into cpu_env.
1485 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
1487 switch (len_remain
) {
1491 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
1495 t1
= tcg_temp_new_i64();
1496 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
1497 tcg_gen_addi_i64(addr
, addr
, 4);
1498 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
1499 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
1500 tcg_temp_free_i64(t1
);
1504 g_assert_not_reached();
1506 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
1508 tcg_temp_free_i64(addr
);
1509 tcg_temp_free_i64(t0
);
1512 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
1514 if (sve_access_check(s
)) {
1515 int size
= vec_full_reg_size(s
);
1516 int off
= vec_full_reg_offset(s
, a
->rd
);
1517 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
1522 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
1524 if (sve_access_check(s
)) {
1525 int size
= pred_full_reg_size(s
);
1526 int off
= pred_full_reg_offset(s
, a
->rd
);
1527 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);