2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 * Helpers for extracting complex instruction fields.
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
43 static int tszimm_esz(int x
)
45 x
>>= 3; /* discard imm3 */
49 static int tszimm_shr(int x
)
51 return (16 << tszimm_esz(x
)) - x
;
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x
)
57 return x
- (8 << tszimm_esz(x
));
61 * Include the generated decoder.
64 #include "decode-sve.inc.c"
67 * Implement all of the translator functions referenced by the decoder.
70 /* Return the offset info CPUARMState of the predicate vector register Pn.
71 * Note for this purpose, FFR is P16.
73 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
75 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
78 /* Return the byte size of the whole predicate register, VL / 64. */
79 static inline int pred_full_reg_size(DisasContext
*s
)
81 return s
->sve_len
>> 3;
84 /* Round up the size of a register to a size allowed by
85 * the tcg vector infrastructure. Any operation which uses this
86 * size may assume that the bits above pred_full_reg_size are zero,
87 * and must leave them the same way.
89 * Note that this is not needed for the vector registers as they
90 * are always properly sized for tcg vectors.
92 static int size_for_gvec(int size
)
97 return QEMU_ALIGN_UP(size
, 16);
101 static int pred_gvec_reg_size(DisasContext
*s
)
103 return size_for_gvec(pred_full_reg_size(s
));
106 /* Invoke a vector expander on two Zregs. */
107 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
108 int esz
, int rd
, int rn
)
110 if (sve_access_check(s
)) {
111 unsigned vsz
= vec_full_reg_size(s
);
112 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
113 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
118 /* Invoke a vector expander on three Zregs. */
119 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
120 int esz
, int rd
, int rn
, int rm
)
122 if (sve_access_check(s
)) {
123 unsigned vsz
= vec_full_reg_size(s
);
124 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
125 vec_full_reg_offset(s
, rn
),
126 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
131 /* Invoke a vector move on two Zregs. */
132 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
134 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
137 /* Invoke a vector expander on two Pregs. */
138 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
139 int esz
, int rd
, int rn
)
141 if (sve_access_check(s
)) {
142 unsigned psz
= pred_gvec_reg_size(s
);
143 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
144 pred_full_reg_offset(s
, rn
), psz
, psz
);
149 /* Invoke a vector expander on three Pregs. */
150 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
151 int esz
, int rd
, int rn
, int rm
)
153 if (sve_access_check(s
)) {
154 unsigned psz
= pred_gvec_reg_size(s
);
155 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
156 pred_full_reg_offset(s
, rn
),
157 pred_full_reg_offset(s
, rm
), psz
, psz
);
162 /* Invoke a vector operation on four Pregs. */
163 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
164 int rd
, int rn
, int rm
, int rg
)
166 if (sve_access_check(s
)) {
167 unsigned psz
= pred_gvec_reg_size(s
);
168 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
169 pred_full_reg_offset(s
, rn
),
170 pred_full_reg_offset(s
, rm
),
171 pred_full_reg_offset(s
, rg
),
177 /* Invoke a vector move on two Pregs. */
178 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
180 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
183 /* Set the cpu flags as per a return from an SVE helper. */
184 static void do_pred_flags(TCGv_i32 t
)
186 tcg_gen_mov_i32(cpu_NF
, t
);
187 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
188 tcg_gen_andi_i32(cpu_CF
, t
, 1);
189 tcg_gen_movi_i32(cpu_VF
, 0);
192 /* Subroutines computing the ARM PredTest psuedofunction. */
193 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
195 TCGv_i32 t
= tcg_temp_new_i32();
197 gen_helper_sve_predtest1(t
, d
, g
);
199 tcg_temp_free_i32(t
);
202 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
204 TCGv_ptr dptr
= tcg_temp_new_ptr();
205 TCGv_ptr gptr
= tcg_temp_new_ptr();
208 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
209 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
210 t
= tcg_const_i32(words
);
212 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
213 tcg_temp_free_ptr(dptr
);
214 tcg_temp_free_ptr(gptr
);
217 tcg_temp_free_i32(t
);
220 /* For each element size, the bits within a predicate word that are active. */
221 const uint64_t pred_esz_masks
[4] = {
222 0xffffffffffffffffull
, 0x5555555555555555ull
,
223 0x1111111111111111ull
, 0x0101010101010101ull
227 *** SVE Logical - Unpredicated Group
230 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
232 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
235 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
237 if (a
->rn
== a
->rm
) { /* MOV */
238 return do_mov_z(s
, a
->rd
, a
->rn
);
240 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
244 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
246 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
249 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
251 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
255 *** SVE Integer Arithmetic - Unpredicated Group
258 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
260 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
263 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
265 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
268 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
270 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
273 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
275 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
278 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
280 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
283 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
285 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
289 *** SVE Integer Arithmetic - Binary Predicated Group
292 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
294 unsigned vsz
= vec_full_reg_size(s
);
298 if (sve_access_check(s
)) {
299 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
300 vec_full_reg_offset(s
, a
->rn
),
301 vec_full_reg_offset(s
, a
->rm
),
302 pred_full_reg_offset(s
, a
->pg
),
308 #define DO_ZPZZ(NAME, name) \
309 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
312 static gen_helper_gvec_4 * const fns[4] = { \
313 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
314 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
316 return do_zpzz_ool(s, a, fns[a->esz]); \
335 DO_ZPZZ(SMULH
, smulh
)
336 DO_ZPZZ(UMULH
, umulh
)
342 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
344 static gen_helper_gvec_4
* const fns
[4] = {
345 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
347 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
350 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
352 static gen_helper_gvec_4
* const fns
[4] = {
353 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
355 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
361 *** SVE Integer Arithmetic - Unary Predicated Group
364 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
369 if (sve_access_check(s
)) {
370 unsigned vsz
= vec_full_reg_size(s
);
371 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
372 vec_full_reg_offset(s
, a
->rn
),
373 pred_full_reg_offset(s
, a
->pg
),
379 #define DO_ZPZ(NAME, name) \
380 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
382 static gen_helper_gvec_3 * const fns[4] = { \
383 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
384 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
386 return do_zpz_ool(s, a, fns[a->esz]); \
391 DO_ZPZ(CNT_zpz
, cnt_zpz
)
393 DO_ZPZ(NOT_zpz
, not_zpz
)
397 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
399 static gen_helper_gvec_3
* const fns
[4] = {
401 gen_helper_sve_fabs_h
,
402 gen_helper_sve_fabs_s
,
403 gen_helper_sve_fabs_d
405 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
408 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
410 static gen_helper_gvec_3
* const fns
[4] = {
412 gen_helper_sve_fneg_h
,
413 gen_helper_sve_fneg_s
,
414 gen_helper_sve_fneg_d
416 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
419 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
421 static gen_helper_gvec_3
* const fns
[4] = {
423 gen_helper_sve_sxtb_h
,
424 gen_helper_sve_sxtb_s
,
425 gen_helper_sve_sxtb_d
427 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
430 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
432 static gen_helper_gvec_3
* const fns
[4] = {
434 gen_helper_sve_uxtb_h
,
435 gen_helper_sve_uxtb_s
,
436 gen_helper_sve_uxtb_d
438 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
441 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
443 static gen_helper_gvec_3
* const fns
[4] = {
445 gen_helper_sve_sxth_s
,
446 gen_helper_sve_sxth_d
448 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
451 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
453 static gen_helper_gvec_3
* const fns
[4] = {
455 gen_helper_sve_uxth_s
,
456 gen_helper_sve_uxth_d
458 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
461 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
463 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
466 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
468 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
474 *** SVE Integer Reduction Group
477 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
478 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
479 gen_helper_gvec_reduc
*fn
)
481 unsigned vsz
= vec_full_reg_size(s
);
489 if (!sve_access_check(s
)) {
493 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
494 temp
= tcg_temp_new_i64();
495 t_zn
= tcg_temp_new_ptr();
496 t_pg
= tcg_temp_new_ptr();
498 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
499 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
500 fn(temp
, t_zn
, t_pg
, desc
);
501 tcg_temp_free_ptr(t_zn
);
502 tcg_temp_free_ptr(t_pg
);
503 tcg_temp_free_i32(desc
);
505 write_fp_dreg(s
, a
->rd
, temp
);
506 tcg_temp_free_i64(temp
);
510 #define DO_VPZ(NAME, name) \
511 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
513 static gen_helper_gvec_reduc * const fns[4] = { \
514 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
515 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
517 return do_vpz_ool(s, a, fns[a->esz]); \
530 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
532 static gen_helper_gvec_reduc
* const fns
[4] = {
533 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
534 gen_helper_sve_saddv_s
, NULL
536 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
542 *** SVE Shift by Immediate - Predicated Group
545 /* Store zero into every active element of Zd. We will use this for two
546 * and three-operand predicated instructions for which logic dictates a
549 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
551 static gen_helper_gvec_2
* const fns
[4] = {
552 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
553 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
555 if (sve_access_check(s
)) {
556 unsigned vsz
= vec_full_reg_size(s
);
557 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
558 pred_full_reg_offset(s
, pg
),
559 vsz
, vsz
, 0, fns
[esz
]);
564 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
565 gen_helper_gvec_3
*fn
)
567 if (sve_access_check(s
)) {
568 unsigned vsz
= vec_full_reg_size(s
);
569 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
570 vec_full_reg_offset(s
, a
->rn
),
571 pred_full_reg_offset(s
, a
->pg
),
572 vsz
, vsz
, a
->imm
, fn
);
577 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
579 static gen_helper_gvec_3
* const fns
[4] = {
580 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
581 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
584 /* Invalid tsz encoding -- see tszimm_esz. */
587 /* Shift by element size is architecturally valid. For
588 arithmetic right-shift, it's the same as by one less. */
589 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
590 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
593 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
595 static gen_helper_gvec_3
* const fns
[4] = {
596 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
597 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
602 /* Shift by element size is architecturally valid.
603 For logical shifts, it is a zeroing operation. */
604 if (a
->imm
>= (8 << a
->esz
)) {
605 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
607 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
611 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
613 static gen_helper_gvec_3
* const fns
[4] = {
614 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
615 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
620 /* Shift by element size is architecturally valid.
621 For logical shifts, it is a zeroing operation. */
622 if (a
->imm
>= (8 << a
->esz
)) {
623 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
625 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
629 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
631 static gen_helper_gvec_3
* const fns
[4] = {
632 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
633 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
638 /* Shift by element size is architecturally valid. For arithmetic
639 right shift for division, it is a zeroing operation. */
640 if (a
->imm
>= (8 << a
->esz
)) {
641 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
643 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
648 *** SVE Bitwise Shift - Predicated Group
651 #define DO_ZPZW(NAME, name) \
652 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
655 static gen_helper_gvec_4 * const fns[3] = { \
656 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
657 gen_helper_sve_##name##_zpzw_s, \
659 if (a->esz < 0 || a->esz >= 3) { \
662 return do_zpzz_ool(s, a, fns[a->esz]); \
672 *** SVE Integer Multiply-Add Group
675 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
676 gen_helper_gvec_5
*fn
)
678 if (sve_access_check(s
)) {
679 unsigned vsz
= vec_full_reg_size(s
);
680 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
681 vec_full_reg_offset(s
, a
->ra
),
682 vec_full_reg_offset(s
, a
->rn
),
683 vec_full_reg_offset(s
, a
->rm
),
684 pred_full_reg_offset(s
, a
->pg
),
690 #define DO_ZPZZZ(NAME, name) \
691 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
693 static gen_helper_gvec_5 * const fns[4] = { \
694 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
695 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
697 return do_zpzzz_ool(s, a, fns[a->esz]); \
706 *** SVE Predicate Logical Operations Group
709 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
710 const GVecGen4
*gvec_op
)
712 if (!sve_access_check(s
)) {
716 unsigned psz
= pred_gvec_reg_size(s
);
717 int dofs
= pred_full_reg_offset(s
, a
->rd
);
718 int nofs
= pred_full_reg_offset(s
, a
->rn
);
719 int mofs
= pred_full_reg_offset(s
, a
->rm
);
720 int gofs
= pred_full_reg_offset(s
, a
->pg
);
723 /* Do the operation and the flags generation in temps. */
724 TCGv_i64 pd
= tcg_temp_new_i64();
725 TCGv_i64 pn
= tcg_temp_new_i64();
726 TCGv_i64 pm
= tcg_temp_new_i64();
727 TCGv_i64 pg
= tcg_temp_new_i64();
729 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
730 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
731 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
733 gvec_op
->fni8(pd
, pn
, pm
, pg
);
734 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
736 do_predtest1(pd
, pg
);
738 tcg_temp_free_i64(pd
);
739 tcg_temp_free_i64(pn
);
740 tcg_temp_free_i64(pm
);
741 tcg_temp_free_i64(pg
);
743 /* The operation and flags generation is large. The computation
744 * of the flags depends on the original contents of the guarding
745 * predicate. If the destination overwrites the guarding predicate,
746 * then the easiest way to get this right is to save a copy.
749 if (a
->rd
== a
->pg
) {
750 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
751 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
754 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
755 do_predtest(s
, dofs
, tofs
, psz
/ 8);
760 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
762 tcg_gen_and_i64(pd
, pn
, pm
);
763 tcg_gen_and_i64(pd
, pd
, pg
);
766 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
767 TCGv_vec pm
, TCGv_vec pg
)
769 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
770 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
773 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
775 static const GVecGen4 op
= {
776 .fni8
= gen_and_pg_i64
,
777 .fniv
= gen_and_pg_vec
,
778 .fno
= gen_helper_sve_and_pppp
,
779 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
782 return do_pppp_flags(s
, a
, &op
);
783 } else if (a
->rn
== a
->rm
) {
784 if (a
->pg
== a
->rn
) {
785 return do_mov_p(s
, a
->rd
, a
->rn
);
787 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
789 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
790 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
792 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
796 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
798 tcg_gen_andc_i64(pd
, pn
, pm
);
799 tcg_gen_and_i64(pd
, pd
, pg
);
802 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
803 TCGv_vec pm
, TCGv_vec pg
)
805 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
806 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
809 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
811 static const GVecGen4 op
= {
812 .fni8
= gen_bic_pg_i64
,
813 .fniv
= gen_bic_pg_vec
,
814 .fno
= gen_helper_sve_bic_pppp
,
815 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
818 return do_pppp_flags(s
, a
, &op
);
819 } else if (a
->pg
== a
->rn
) {
820 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
822 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
826 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
828 tcg_gen_xor_i64(pd
, pn
, pm
);
829 tcg_gen_and_i64(pd
, pd
, pg
);
832 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
833 TCGv_vec pm
, TCGv_vec pg
)
835 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
836 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
839 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
841 static const GVecGen4 op
= {
842 .fni8
= gen_eor_pg_i64
,
843 .fniv
= gen_eor_pg_vec
,
844 .fno
= gen_helper_sve_eor_pppp
,
845 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
848 return do_pppp_flags(s
, a
, &op
);
850 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
854 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
856 tcg_gen_and_i64(pn
, pn
, pg
);
857 tcg_gen_andc_i64(pm
, pm
, pg
);
858 tcg_gen_or_i64(pd
, pn
, pm
);
861 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
862 TCGv_vec pm
, TCGv_vec pg
)
864 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
865 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
866 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
869 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
871 static const GVecGen4 op
= {
872 .fni8
= gen_sel_pg_i64
,
873 .fniv
= gen_sel_pg_vec
,
874 .fno
= gen_helper_sve_sel_pppp
,
875 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
880 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
884 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
886 tcg_gen_or_i64(pd
, pn
, pm
);
887 tcg_gen_and_i64(pd
, pd
, pg
);
890 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
891 TCGv_vec pm
, TCGv_vec pg
)
893 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
894 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
897 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
899 static const GVecGen4 op
= {
900 .fni8
= gen_orr_pg_i64
,
901 .fniv
= gen_orr_pg_vec
,
902 .fno
= gen_helper_sve_orr_pppp
,
903 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
906 return do_pppp_flags(s
, a
, &op
);
907 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
908 return do_mov_p(s
, a
->rd
, a
->rn
);
910 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
914 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
916 tcg_gen_orc_i64(pd
, pn
, pm
);
917 tcg_gen_and_i64(pd
, pd
, pg
);
920 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
921 TCGv_vec pm
, TCGv_vec pg
)
923 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
924 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
927 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
929 static const GVecGen4 op
= {
930 .fni8
= gen_orn_pg_i64
,
931 .fniv
= gen_orn_pg_vec
,
932 .fno
= gen_helper_sve_orn_pppp
,
933 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
936 return do_pppp_flags(s
, a
, &op
);
938 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
942 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
944 tcg_gen_or_i64(pd
, pn
, pm
);
945 tcg_gen_andc_i64(pd
, pg
, pd
);
948 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
949 TCGv_vec pm
, TCGv_vec pg
)
951 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
952 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
955 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
957 static const GVecGen4 op
= {
958 .fni8
= gen_nor_pg_i64
,
959 .fniv
= gen_nor_pg_vec
,
960 .fno
= gen_helper_sve_nor_pppp
,
961 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
964 return do_pppp_flags(s
, a
, &op
);
966 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
970 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
972 tcg_gen_and_i64(pd
, pn
, pm
);
973 tcg_gen_andc_i64(pd
, pg
, pd
);
976 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
977 TCGv_vec pm
, TCGv_vec pg
)
979 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
980 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
983 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
985 static const GVecGen4 op
= {
986 .fni8
= gen_nand_pg_i64
,
987 .fniv
= gen_nand_pg_vec
,
988 .fno
= gen_helper_sve_nand_pppp
,
989 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
992 return do_pppp_flags(s
, a
, &op
);
994 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
999 *** SVE Predicate Misc Group
1002 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1004 if (sve_access_check(s
)) {
1005 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1006 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1007 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1010 TCGv_i64 pn
= tcg_temp_new_i64();
1011 TCGv_i64 pg
= tcg_temp_new_i64();
1013 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1014 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1015 do_predtest1(pn
, pg
);
1017 tcg_temp_free_i64(pn
);
1018 tcg_temp_free_i64(pg
);
1020 do_predtest(s
, nofs
, gofs
, words
);
1026 /* See the ARM pseudocode DecodePredCount. */
1027 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1029 unsigned elements
= fullsz
>> esz
;
1033 case 0x0: /* POW2 */
1034 return pow2floor(elements
);
1045 case 0x9: /* VL16 */
1046 case 0xa: /* VL32 */
1047 case 0xb: /* VL64 */
1048 case 0xc: /* VL128 */
1049 case 0xd: /* VL256 */
1050 bound
= 16 << (pattern
- 9);
1052 case 0x1d: /* MUL4 */
1053 return elements
- elements
% 4;
1054 case 0x1e: /* MUL3 */
1055 return elements
- elements
% 3;
1056 case 0x1f: /* ALL */
1058 default: /* #uimm5 */
1061 return elements
>= bound
? bound
: 0;
1064 /* This handles all of the predicate initialization instructions,
1065 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1066 * so that decode_pred_count returns 0. For SETFFR, we will have
1067 * set RD == 16 == FFR.
1069 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1071 if (!sve_access_check(s
)) {
1075 unsigned fullsz
= vec_full_reg_size(s
);
1076 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1077 unsigned numelem
, setsz
, i
;
1078 uint64_t word
, lastword
;
1081 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1083 /* Determine what we must store into each bit, and how many. */
1085 lastword
= word
= 0;
1088 setsz
= numelem
<< esz
;
1089 lastword
= word
= pred_esz_masks
[esz
];
1091 lastword
&= ~(-1ull << (setsz
% 64));
1095 t
= tcg_temp_new_i64();
1097 tcg_gen_movi_i64(t
, lastword
);
1098 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1102 if (word
== lastword
) {
1103 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1104 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1106 if (oprsz
* 8 == setsz
) {
1107 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1110 if (oprsz
* 8 == setsz
+ 8) {
1111 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1112 tcg_gen_movi_i64(t
, 0);
1113 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1121 tcg_gen_movi_i64(t
, word
);
1122 for (i
= 0; i
< setsz
; i
+= 8) {
1123 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1125 if (lastword
!= word
) {
1126 tcg_gen_movi_i64(t
, lastword
);
1127 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1131 tcg_gen_movi_i64(t
, 0);
1132 for (; i
< fullsz
; i
+= 8) {
1133 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1138 tcg_temp_free_i64(t
);
1142 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1143 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1144 tcg_gen_movi_i32(cpu_VF
, 0);
1145 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1150 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1152 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1155 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1157 /* Note pat == 31 is #all, to set all elements. */
1158 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1161 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1163 /* Note pat == 32 is #unimp, to set no elements. */
1164 return do_predset(s
, 0, a
->rd
, 32, false);
1167 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1169 /* The path through do_pppp_flags is complicated enough to want to avoid
1170 * duplication. Frob the arguments into the form of a predicated AND.
1172 arg_rprr_s alt_a
= {
1173 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1174 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1176 return trans_AND_pppp(s
, &alt_a
, insn
);
1179 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1181 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1184 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1186 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1189 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1190 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1191 TCGv_ptr
, TCGv_i32
))
1193 if (!sve_access_check(s
)) {
1197 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1198 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1202 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1203 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1205 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1206 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1207 t
= tcg_const_i32(desc
);
1209 gen_fn(t
, t_pd
, t_pg
, t
);
1210 tcg_temp_free_ptr(t_pd
);
1211 tcg_temp_free_ptr(t_pg
);
1214 tcg_temp_free_i32(t
);
1218 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1220 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1223 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1225 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1229 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1232 /* Subroutine loading a vector register at VOFS of LEN bytes.
1233 * The load should begin at the address Rn + IMM.
1236 static void do_ldr(DisasContext
*s
, uint32_t vofs
, uint32_t len
,
1239 uint32_t len_align
= QEMU_ALIGN_DOWN(len
, 8);
1240 uint32_t len_remain
= len
% 8;
1241 uint32_t nparts
= len
/ 8 + ctpop8(len_remain
);
1242 int midx
= get_mem_index(s
);
1243 TCGv_i64 addr
, t0
, t1
;
1245 addr
= tcg_temp_new_i64();
1246 t0
= tcg_temp_new_i64();
1248 /* Note that unpredicated load/store of vector/predicate registers
1249 * are defined as a stream of bytes, which equates to little-endian
1250 * operations on larger quantities. There is no nice way to force
1251 * a little-endian load for aarch64_be-linux-user out of line.
1253 * Attempt to keep code expansion to a minimum by limiting the
1254 * amount of unrolling done.
1259 for (i
= 0; i
< len_align
; i
+= 8) {
1260 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ i
);
1261 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
1262 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ i
);
1265 TCGLabel
*loop
= gen_new_label();
1266 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
1268 gen_set_label(loop
);
1270 /* Minimize the number of local temps that must be re-read from
1271 * the stack each iteration. Instead, re-compute values other
1272 * than the loop counter.
1274 tp
= tcg_temp_new_ptr();
1275 tcg_gen_addi_ptr(tp
, i
, imm
);
1276 tcg_gen_extu_ptr_i64(addr
, tp
);
1277 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, rn
));
1279 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEQ
);
1281 tcg_gen_add_ptr(tp
, cpu_env
, i
);
1282 tcg_gen_addi_ptr(i
, i
, 8);
1283 tcg_gen_st_i64(t0
, tp
, vofs
);
1284 tcg_temp_free_ptr(tp
);
1286 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
1287 tcg_temp_free_ptr(i
);
1290 /* Predicate register loads can be any multiple of 2.
1291 * Note that we still store the entire 64-bit unit into cpu_env.
1294 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
+ len_align
);
1296 switch (len_remain
) {
1300 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LE
| ctz32(len_remain
));
1304 t1
= tcg_temp_new_i64();
1305 tcg_gen_qemu_ld_i64(t0
, addr
, midx
, MO_LEUL
);
1306 tcg_gen_addi_i64(addr
, addr
, 4);
1307 tcg_gen_qemu_ld_i64(t1
, addr
, midx
, MO_LEUW
);
1308 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
1309 tcg_temp_free_i64(t1
);
1313 g_assert_not_reached();
1315 tcg_gen_st_i64(t0
, cpu_env
, vofs
+ len_align
);
1317 tcg_temp_free_i64(addr
);
1318 tcg_temp_free_i64(t0
);
1321 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
1323 if (sve_access_check(s
)) {
1324 int size
= vec_full_reg_size(s
);
1325 int off
= vec_full_reg_offset(s
, a
->rd
);
1326 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);
1331 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
, uint32_t insn
)
1333 if (sve_access_check(s
)) {
1334 int size
= pred_full_reg_size(s
);
1335 int off
= pred_full_reg_offset(s
, a
->rd
);
1336 do_ldr(s
, off
, size
, a
->rn
, a
->imm
* size
);