]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE Predicate Count Group
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
ccd841c3
RH
45/*
46 * Helpers for extracting complex instruction fields.
47 */
48
49/* See e.g. ASR (immediate, predicated).
50 * Returns -1 for unallocated encoding; diagnose later.
51 */
52static int tszimm_esz(int x)
53{
54 x >>= 3; /* discard imm3 */
55 return 31 - clz32(x);
56}
57
58static int tszimm_shr(int x)
59{
60 return (16 << tszimm_esz(x)) - x;
61}
62
63/* See e.g. LSL (immediate, predicated). */
64static int tszimm_shl(int x)
65{
66 return x - (8 << tszimm_esz(x));
67}
68
24e82e68
RH
69static inline int plus1(int x)
70{
71 return x + 1;
72}
73
f25a2361
RH
74/* The SH bit is in bit 8. Extract the low 8 and shift. */
75static inline int expand_imm_sh8s(int x)
76{
77 return (int8_t)x << (x & 0x100 ? 8 : 0);
78}
79
38388f7e
RH
80/*
81 * Include the generated decoder.
82 */
83
84#include "decode-sve.inc.c"
85
86/*
87 * Implement all of the translator functions referenced by the decoder.
88 */
89
d1822297
RH
90/* Return the offset info CPUARMState of the predicate vector register Pn.
91 * Note for this purpose, FFR is P16.
92 */
93static inline int pred_full_reg_offset(DisasContext *s, int regno)
94{
95 return offsetof(CPUARMState, vfp.pregs[regno]);
96}
97
98/* Return the byte size of the whole predicate register, VL / 64. */
99static inline int pred_full_reg_size(DisasContext *s)
100{
101 return s->sve_len >> 3;
102}
103
516e246a
RH
104/* Round up the size of a register to a size allowed by
105 * the tcg vector infrastructure. Any operation which uses this
106 * size may assume that the bits above pred_full_reg_size are zero,
107 * and must leave them the same way.
108 *
109 * Note that this is not needed for the vector registers as they
110 * are always properly sized for tcg vectors.
111 */
112static int size_for_gvec(int size)
113{
114 if (size <= 8) {
115 return 8;
116 } else {
117 return QEMU_ALIGN_UP(size, 16);
118 }
119}
120
121static int pred_gvec_reg_size(DisasContext *s)
122{
123 return size_for_gvec(pred_full_reg_size(s));
124}
125
39eea561
RH
126/* Invoke a vector expander on two Zregs. */
127static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
128 int esz, int rd, int rn)
38388f7e 129{
39eea561
RH
130 if (sve_access_check(s)) {
131 unsigned vsz = vec_full_reg_size(s);
132 gvec_fn(esz, vec_full_reg_offset(s, rd),
133 vec_full_reg_offset(s, rn), vsz, vsz);
134 }
135 return true;
38388f7e
RH
136}
137
39eea561
RH
138/* Invoke a vector expander on three Zregs. */
139static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
140 int esz, int rd, int rn, int rm)
38388f7e 141{
39eea561
RH
142 if (sve_access_check(s)) {
143 unsigned vsz = vec_full_reg_size(s);
144 gvec_fn(esz, vec_full_reg_offset(s, rd),
145 vec_full_reg_offset(s, rn),
146 vec_full_reg_offset(s, rm), vsz, vsz);
147 }
148 return true;
38388f7e
RH
149}
150
39eea561
RH
151/* Invoke a vector move on two Zregs. */
152static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 153{
39eea561 154 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
155}
156
d9d78dcc
RH
157/* Initialize a Zreg with replications of a 64-bit immediate. */
158static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
159{
160 unsigned vsz = vec_full_reg_size(s);
161 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
162}
163
516e246a
RH
164/* Invoke a vector expander on two Pregs. */
165static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
166 int esz, int rd, int rn)
167{
168 if (sve_access_check(s)) {
169 unsigned psz = pred_gvec_reg_size(s);
170 gvec_fn(esz, pred_full_reg_offset(s, rd),
171 pred_full_reg_offset(s, rn), psz, psz);
172 }
173 return true;
174}
175
176/* Invoke a vector expander on three Pregs. */
177static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
178 int esz, int rd, int rn, int rm)
179{
180 if (sve_access_check(s)) {
181 unsigned psz = pred_gvec_reg_size(s);
182 gvec_fn(esz, pred_full_reg_offset(s, rd),
183 pred_full_reg_offset(s, rn),
184 pred_full_reg_offset(s, rm), psz, psz);
185 }
186 return true;
187}
188
189/* Invoke a vector operation on four Pregs. */
190static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
191 int rd, int rn, int rm, int rg)
192{
193 if (sve_access_check(s)) {
194 unsigned psz = pred_gvec_reg_size(s);
195 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
196 pred_full_reg_offset(s, rn),
197 pred_full_reg_offset(s, rm),
198 pred_full_reg_offset(s, rg),
199 psz, psz, gvec_op);
200 }
201 return true;
202}
203
204/* Invoke a vector move on two Pregs. */
205static bool do_mov_p(DisasContext *s, int rd, int rn)
206{
207 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
208}
209
9e18d7a6
RH
210/* Set the cpu flags as per a return from an SVE helper. */
211static void do_pred_flags(TCGv_i32 t)
212{
213 tcg_gen_mov_i32(cpu_NF, t);
214 tcg_gen_andi_i32(cpu_ZF, t, 2);
215 tcg_gen_andi_i32(cpu_CF, t, 1);
216 tcg_gen_movi_i32(cpu_VF, 0);
217}
218
219/* Subroutines computing the ARM PredTest psuedofunction. */
220static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
221{
222 TCGv_i32 t = tcg_temp_new_i32();
223
224 gen_helper_sve_predtest1(t, d, g);
225 do_pred_flags(t);
226 tcg_temp_free_i32(t);
227}
228
229static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
230{
231 TCGv_ptr dptr = tcg_temp_new_ptr();
232 TCGv_ptr gptr = tcg_temp_new_ptr();
233 TCGv_i32 t;
234
235 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
236 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
237 t = tcg_const_i32(words);
238
239 gen_helper_sve_predtest(t, dptr, gptr, t);
240 tcg_temp_free_ptr(dptr);
241 tcg_temp_free_ptr(gptr);
242
243 do_pred_flags(t);
244 tcg_temp_free_i32(t);
245}
246
028e2a7b
RH
247/* For each element size, the bits within a predicate word that are active. */
248const uint64_t pred_esz_masks[4] = {
249 0xffffffffffffffffull, 0x5555555555555555ull,
250 0x1111111111111111ull, 0x0101010101010101ull
251};
252
39eea561
RH
253/*
254 *** SVE Logical - Unpredicated Group
255 */
256
257static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
258{
259 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
260}
261
262static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
263{
264 if (a->rn == a->rm) { /* MOV */
265 return do_mov_z(s, a->rd, a->rn);
266 } else {
267 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
268 }
269}
270
271static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
272{
273 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
274}
275
276static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 277{
39eea561 278 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 279}
d1822297 280
fea98f9c
RH
281/*
282 *** SVE Integer Arithmetic - Unpredicated Group
283 */
284
285static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
286{
287 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
288}
289
290static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291{
292 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
293}
294
295static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
296{
297 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
298}
299
300static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
301{
302 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
303}
304
305static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
306{
307 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
308}
309
310static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
311{
312 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
313}
314
f97cfd59
RH
315/*
316 *** SVE Integer Arithmetic - Binary Predicated Group
317 */
318
319static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
320{
321 unsigned vsz = vec_full_reg_size(s);
322 if (fn == NULL) {
323 return false;
324 }
325 if (sve_access_check(s)) {
326 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
327 vec_full_reg_offset(s, a->rn),
328 vec_full_reg_offset(s, a->rm),
329 pred_full_reg_offset(s, a->pg),
330 vsz, vsz, 0, fn);
331 }
332 return true;
333}
334
335#define DO_ZPZZ(NAME, name) \
336static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
337 uint32_t insn) \
338{ \
339 static gen_helper_gvec_4 * const fns[4] = { \
340 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
341 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
342 }; \
343 return do_zpzz_ool(s, a, fns[a->esz]); \
344}
345
346DO_ZPZZ(AND, and)
347DO_ZPZZ(EOR, eor)
348DO_ZPZZ(ORR, orr)
349DO_ZPZZ(BIC, bic)
350
351DO_ZPZZ(ADD, add)
352DO_ZPZZ(SUB, sub)
353
354DO_ZPZZ(SMAX, smax)
355DO_ZPZZ(UMAX, umax)
356DO_ZPZZ(SMIN, smin)
357DO_ZPZZ(UMIN, umin)
358DO_ZPZZ(SABD, sabd)
359DO_ZPZZ(UABD, uabd)
360
361DO_ZPZZ(MUL, mul)
362DO_ZPZZ(SMULH, smulh)
363DO_ZPZZ(UMULH, umulh)
364
27721dbb
RH
365DO_ZPZZ(ASR, asr)
366DO_ZPZZ(LSR, lsr)
367DO_ZPZZ(LSL, lsl)
368
f97cfd59
RH
369static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
370{
371 static gen_helper_gvec_4 * const fns[4] = {
372 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
373 };
374 return do_zpzz_ool(s, a, fns[a->esz]);
375}
376
377static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
378{
379 static gen_helper_gvec_4 * const fns[4] = {
380 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
381 };
382 return do_zpzz_ool(s, a, fns[a->esz]);
383}
384
d3fe4a29
RH
385DO_ZPZZ(SEL, sel)
386
f97cfd59
RH
387#undef DO_ZPZZ
388
afac6d04
RH
389/*
390 *** SVE Integer Arithmetic - Unary Predicated Group
391 */
392
393static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
394{
395 if (fn == NULL) {
396 return false;
397 }
398 if (sve_access_check(s)) {
399 unsigned vsz = vec_full_reg_size(s);
400 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
401 vec_full_reg_offset(s, a->rn),
402 pred_full_reg_offset(s, a->pg),
403 vsz, vsz, 0, fn);
404 }
405 return true;
406}
407
408#define DO_ZPZ(NAME, name) \
409static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
410{ \
411 static gen_helper_gvec_3 * const fns[4] = { \
412 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
413 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
414 }; \
415 return do_zpz_ool(s, a, fns[a->esz]); \
416}
417
418DO_ZPZ(CLS, cls)
419DO_ZPZ(CLZ, clz)
420DO_ZPZ(CNT_zpz, cnt_zpz)
421DO_ZPZ(CNOT, cnot)
422DO_ZPZ(NOT_zpz, not_zpz)
423DO_ZPZ(ABS, abs)
424DO_ZPZ(NEG, neg)
425
426static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
427{
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_fabs_h,
431 gen_helper_sve_fabs_s,
432 gen_helper_sve_fabs_d
433 };
434 return do_zpz_ool(s, a, fns[a->esz]);
435}
436
437static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
438{
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_fneg_h,
442 gen_helper_sve_fneg_s,
443 gen_helper_sve_fneg_d
444 };
445 return do_zpz_ool(s, a, fns[a->esz]);
446}
447
448static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
449{
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL,
452 gen_helper_sve_sxtb_h,
453 gen_helper_sve_sxtb_s,
454 gen_helper_sve_sxtb_d
455 };
456 return do_zpz_ool(s, a, fns[a->esz]);
457}
458
459static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
460{
461 static gen_helper_gvec_3 * const fns[4] = {
462 NULL,
463 gen_helper_sve_uxtb_h,
464 gen_helper_sve_uxtb_s,
465 gen_helper_sve_uxtb_d
466 };
467 return do_zpz_ool(s, a, fns[a->esz]);
468}
469
470static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
471{
472 static gen_helper_gvec_3 * const fns[4] = {
473 NULL, NULL,
474 gen_helper_sve_sxth_s,
475 gen_helper_sve_sxth_d
476 };
477 return do_zpz_ool(s, a, fns[a->esz]);
478}
479
480static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
481{
482 static gen_helper_gvec_3 * const fns[4] = {
483 NULL, NULL,
484 gen_helper_sve_uxth_s,
485 gen_helper_sve_uxth_d
486 };
487 return do_zpz_ool(s, a, fns[a->esz]);
488}
489
490static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
491{
492 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
493}
494
495static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
496{
497 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
498}
499
500#undef DO_ZPZ
501
047cec97
RH
502/*
503 *** SVE Integer Reduction Group
504 */
505
506typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
507static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
508 gen_helper_gvec_reduc *fn)
509{
510 unsigned vsz = vec_full_reg_size(s);
511 TCGv_ptr t_zn, t_pg;
512 TCGv_i32 desc;
513 TCGv_i64 temp;
514
515 if (fn == NULL) {
516 return false;
517 }
518 if (!sve_access_check(s)) {
519 return true;
520 }
521
522 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
523 temp = tcg_temp_new_i64();
524 t_zn = tcg_temp_new_ptr();
525 t_pg = tcg_temp_new_ptr();
526
527 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
528 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
529 fn(temp, t_zn, t_pg, desc);
530 tcg_temp_free_ptr(t_zn);
531 tcg_temp_free_ptr(t_pg);
532 tcg_temp_free_i32(desc);
533
534 write_fp_dreg(s, a->rd, temp);
535 tcg_temp_free_i64(temp);
536 return true;
537}
538
539#define DO_VPZ(NAME, name) \
540static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
541{ \
542 static gen_helper_gvec_reduc * const fns[4] = { \
543 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
544 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
545 }; \
546 return do_vpz_ool(s, a, fns[a->esz]); \
547}
548
549DO_VPZ(ORV, orv)
550DO_VPZ(ANDV, andv)
551DO_VPZ(EORV, eorv)
552
553DO_VPZ(UADDV, uaddv)
554DO_VPZ(SMAXV, smaxv)
555DO_VPZ(UMAXV, umaxv)
556DO_VPZ(SMINV, sminv)
557DO_VPZ(UMINV, uminv)
558
559static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
560{
561 static gen_helper_gvec_reduc * const fns[4] = {
562 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
563 gen_helper_sve_saddv_s, NULL
564 };
565 return do_vpz_ool(s, a, fns[a->esz]);
566}
567
568#undef DO_VPZ
569
ccd841c3
RH
570/*
571 *** SVE Shift by Immediate - Predicated Group
572 */
573
574/* Store zero into every active element of Zd. We will use this for two
575 * and three-operand predicated instructions for which logic dictates a
576 * zero result.
577 */
578static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
579{
580 static gen_helper_gvec_2 * const fns[4] = {
581 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
582 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
583 };
584 if (sve_access_check(s)) {
585 unsigned vsz = vec_full_reg_size(s);
586 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
587 pred_full_reg_offset(s, pg),
588 vsz, vsz, 0, fns[esz]);
589 }
590 return true;
591}
592
593static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
594 gen_helper_gvec_3 *fn)
595{
596 if (sve_access_check(s)) {
597 unsigned vsz = vec_full_reg_size(s);
598 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
599 vec_full_reg_offset(s, a->rn),
600 pred_full_reg_offset(s, a->pg),
601 vsz, vsz, a->imm, fn);
602 }
603 return true;
604}
605
606static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
607{
608 static gen_helper_gvec_3 * const fns[4] = {
609 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
610 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
611 };
612 if (a->esz < 0) {
613 /* Invalid tsz encoding -- see tszimm_esz. */
614 return false;
615 }
616 /* Shift by element size is architecturally valid. For
617 arithmetic right-shift, it's the same as by one less. */
618 a->imm = MIN(a->imm, (8 << a->esz) - 1);
619 return do_zpzi_ool(s, a, fns[a->esz]);
620}
621
622static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
623{
624 static gen_helper_gvec_3 * const fns[4] = {
625 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
626 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
627 };
628 if (a->esz < 0) {
629 return false;
630 }
631 /* Shift by element size is architecturally valid.
632 For logical shifts, it is a zeroing operation. */
633 if (a->imm >= (8 << a->esz)) {
634 return do_clr_zp(s, a->rd, a->pg, a->esz);
635 } else {
636 return do_zpzi_ool(s, a, fns[a->esz]);
637 }
638}
639
640static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
641{
642 static gen_helper_gvec_3 * const fns[4] = {
643 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
644 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
645 };
646 if (a->esz < 0) {
647 return false;
648 }
649 /* Shift by element size is architecturally valid.
650 For logical shifts, it is a zeroing operation. */
651 if (a->imm >= (8 << a->esz)) {
652 return do_clr_zp(s, a->rd, a->pg, a->esz);
653 } else {
654 return do_zpzi_ool(s, a, fns[a->esz]);
655 }
656}
657
658static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
659{
660 static gen_helper_gvec_3 * const fns[4] = {
661 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
662 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
663 };
664 if (a->esz < 0) {
665 return false;
666 }
667 /* Shift by element size is architecturally valid. For arithmetic
668 right shift for division, it is a zeroing operation. */
669 if (a->imm >= (8 << a->esz)) {
670 return do_clr_zp(s, a->rd, a->pg, a->esz);
671 } else {
672 return do_zpzi_ool(s, a, fns[a->esz]);
673 }
674}
675
fe7f8dfb
RH
676/*
677 *** SVE Bitwise Shift - Predicated Group
678 */
679
680#define DO_ZPZW(NAME, name) \
681static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
682 uint32_t insn) \
683{ \
684 static gen_helper_gvec_4 * const fns[3] = { \
685 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
686 gen_helper_sve_##name##_zpzw_s, \
687 }; \
688 if (a->esz < 0 || a->esz >= 3) { \
689 return false; \
690 } \
691 return do_zpzz_ool(s, a, fns[a->esz]); \
692}
693
694DO_ZPZW(ASR, asr)
695DO_ZPZW(LSR, lsr)
696DO_ZPZW(LSL, lsl)
697
698#undef DO_ZPZW
699
d9d78dcc
RH
700/*
701 *** SVE Bitwise Shift - Unpredicated Group
702 */
703
704static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
705 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
706 int64_t, uint32_t, uint32_t))
707{
708 if (a->esz < 0) {
709 /* Invalid tsz encoding -- see tszimm_esz. */
710 return false;
711 }
712 if (sve_access_check(s)) {
713 unsigned vsz = vec_full_reg_size(s);
714 /* Shift by element size is architecturally valid. For
715 arithmetic right-shift, it's the same as by one less.
716 Otherwise it is a zeroing operation. */
717 if (a->imm >= 8 << a->esz) {
718 if (asr) {
719 a->imm = (8 << a->esz) - 1;
720 } else {
721 do_dupi_z(s, a->rd, 0);
722 return true;
723 }
724 }
725 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
726 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
727 }
728 return true;
729}
730
731static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
732{
733 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
734}
735
736static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
737{
738 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
739}
740
741static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
742{
743 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
744}
745
746static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
747{
748 if (fn == NULL) {
749 return false;
750 }
751 if (sve_access_check(s)) {
752 unsigned vsz = vec_full_reg_size(s);
753 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
754 vec_full_reg_offset(s, a->rn),
755 vec_full_reg_offset(s, a->rm),
756 vsz, vsz, 0, fn);
757 }
758 return true;
759}
760
761#define DO_ZZW(NAME, name) \
762static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
763 uint32_t insn) \
764{ \
765 static gen_helper_gvec_3 * const fns[4] = { \
766 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
767 gen_helper_sve_##name##_zzw_s, NULL \
768 }; \
769 return do_zzw_ool(s, a, fns[a->esz]); \
770}
771
772DO_ZZW(ASR, asr)
773DO_ZZW(LSR, lsr)
774DO_ZZW(LSL, lsl)
775
776#undef DO_ZZW
777
96a36e4a
RH
778/*
779 *** SVE Integer Multiply-Add Group
780 */
781
782static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
783 gen_helper_gvec_5 *fn)
784{
785 if (sve_access_check(s)) {
786 unsigned vsz = vec_full_reg_size(s);
787 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
788 vec_full_reg_offset(s, a->ra),
789 vec_full_reg_offset(s, a->rn),
790 vec_full_reg_offset(s, a->rm),
791 pred_full_reg_offset(s, a->pg),
792 vsz, vsz, 0, fn);
793 }
794 return true;
795}
796
797#define DO_ZPZZZ(NAME, name) \
798static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
799{ \
800 static gen_helper_gvec_5 * const fns[4] = { \
801 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
802 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
803 }; \
804 return do_zpzzz_ool(s, a, fns[a->esz]); \
805}
806
807DO_ZPZZZ(MLA, mla)
808DO_ZPZZZ(MLS, mls)
809
810#undef DO_ZPZZZ
811
9a56c9c3
RH
812/*
813 *** SVE Index Generation Group
814 */
815
816static void do_index(DisasContext *s, int esz, int rd,
817 TCGv_i64 start, TCGv_i64 incr)
818{
819 unsigned vsz = vec_full_reg_size(s);
820 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
821 TCGv_ptr t_zd = tcg_temp_new_ptr();
822
823 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
824 if (esz == 3) {
825 gen_helper_sve_index_d(t_zd, start, incr, desc);
826 } else {
827 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
828 static index_fn * const fns[3] = {
829 gen_helper_sve_index_b,
830 gen_helper_sve_index_h,
831 gen_helper_sve_index_s,
832 };
833 TCGv_i32 s32 = tcg_temp_new_i32();
834 TCGv_i32 i32 = tcg_temp_new_i32();
835
836 tcg_gen_extrl_i64_i32(s32, start);
837 tcg_gen_extrl_i64_i32(i32, incr);
838 fns[esz](t_zd, s32, i32, desc);
839
840 tcg_temp_free_i32(s32);
841 tcg_temp_free_i32(i32);
842 }
843 tcg_temp_free_ptr(t_zd);
844 tcg_temp_free_i32(desc);
845}
846
847static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
848{
849 if (sve_access_check(s)) {
850 TCGv_i64 start = tcg_const_i64(a->imm1);
851 TCGv_i64 incr = tcg_const_i64(a->imm2);
852 do_index(s, a->esz, a->rd, start, incr);
853 tcg_temp_free_i64(start);
854 tcg_temp_free_i64(incr);
855 }
856 return true;
857}
858
859static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
860{
861 if (sve_access_check(s)) {
862 TCGv_i64 start = tcg_const_i64(a->imm);
863 TCGv_i64 incr = cpu_reg(s, a->rm);
864 do_index(s, a->esz, a->rd, start, incr);
865 tcg_temp_free_i64(start);
866 }
867 return true;
868}
869
870static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
871{
872 if (sve_access_check(s)) {
873 TCGv_i64 start = cpu_reg(s, a->rn);
874 TCGv_i64 incr = tcg_const_i64(a->imm);
875 do_index(s, a->esz, a->rd, start, incr);
876 tcg_temp_free_i64(incr);
877 }
878 return true;
879}
880
881static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
882{
883 if (sve_access_check(s)) {
884 TCGv_i64 start = cpu_reg(s, a->rn);
885 TCGv_i64 incr = cpu_reg(s, a->rm);
886 do_index(s, a->esz, a->rd, start, incr);
887 }
888 return true;
889}
890
96f922cc
RH
891/*
892 *** SVE Stack Allocation Group
893 */
894
895static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
896{
897 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
898 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
899 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
900 return true;
901}
902
903static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
904{
905 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
906 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
907 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
908 return true;
909}
910
911static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
912{
913 TCGv_i64 reg = cpu_reg(s, a->rd);
914 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
915 return true;
916}
917
4b242d9c
RH
918/*
919 *** SVE Compute Vector Address Group
920 */
921
922static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
923{
924 if (sve_access_check(s)) {
925 unsigned vsz = vec_full_reg_size(s);
926 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
927 vec_full_reg_offset(s, a->rn),
928 vec_full_reg_offset(s, a->rm),
929 vsz, vsz, a->imm, fn);
930 }
931 return true;
932}
933
934static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
935{
936 return do_adr(s, a, gen_helper_sve_adr_p32);
937}
938
939static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
940{
941 return do_adr(s, a, gen_helper_sve_adr_p64);
942}
943
944static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
945{
946 return do_adr(s, a, gen_helper_sve_adr_s32);
947}
948
949static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
950{
951 return do_adr(s, a, gen_helper_sve_adr_u32);
952}
953
0762cd42
RH
954/*
955 *** SVE Integer Misc - Unpredicated Group
956 */
957
958static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
959{
960 static gen_helper_gvec_2 * const fns[4] = {
961 NULL,
962 gen_helper_sve_fexpa_h,
963 gen_helper_sve_fexpa_s,
964 gen_helper_sve_fexpa_d,
965 };
966 if (a->esz == 0) {
967 return false;
968 }
969 if (sve_access_check(s)) {
970 unsigned vsz = vec_full_reg_size(s);
971 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
972 vec_full_reg_offset(s, a->rn),
973 vsz, vsz, 0, fns[a->esz]);
974 }
975 return true;
976}
977
a1f233f2
RH
978static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
979{
980 static gen_helper_gvec_3 * const fns[4] = {
981 NULL,
982 gen_helper_sve_ftssel_h,
983 gen_helper_sve_ftssel_s,
984 gen_helper_sve_ftssel_d,
985 };
986 if (a->esz == 0) {
987 return false;
988 }
989 if (sve_access_check(s)) {
990 unsigned vsz = vec_full_reg_size(s);
991 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
992 vec_full_reg_offset(s, a->rn),
993 vec_full_reg_offset(s, a->rm),
994 vsz, vsz, 0, fns[a->esz]);
995 }
996 return true;
997}
998
516e246a
RH
999/*
1000 *** SVE Predicate Logical Operations Group
1001 */
1002
1003static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1004 const GVecGen4 *gvec_op)
1005{
1006 if (!sve_access_check(s)) {
1007 return true;
1008 }
1009
1010 unsigned psz = pred_gvec_reg_size(s);
1011 int dofs = pred_full_reg_offset(s, a->rd);
1012 int nofs = pred_full_reg_offset(s, a->rn);
1013 int mofs = pred_full_reg_offset(s, a->rm);
1014 int gofs = pred_full_reg_offset(s, a->pg);
1015
1016 if (psz == 8) {
1017 /* Do the operation and the flags generation in temps. */
1018 TCGv_i64 pd = tcg_temp_new_i64();
1019 TCGv_i64 pn = tcg_temp_new_i64();
1020 TCGv_i64 pm = tcg_temp_new_i64();
1021 TCGv_i64 pg = tcg_temp_new_i64();
1022
1023 tcg_gen_ld_i64(pn, cpu_env, nofs);
1024 tcg_gen_ld_i64(pm, cpu_env, mofs);
1025 tcg_gen_ld_i64(pg, cpu_env, gofs);
1026
1027 gvec_op->fni8(pd, pn, pm, pg);
1028 tcg_gen_st_i64(pd, cpu_env, dofs);
1029
1030 do_predtest1(pd, pg);
1031
1032 tcg_temp_free_i64(pd);
1033 tcg_temp_free_i64(pn);
1034 tcg_temp_free_i64(pm);
1035 tcg_temp_free_i64(pg);
1036 } else {
1037 /* The operation and flags generation is large. The computation
1038 * of the flags depends on the original contents of the guarding
1039 * predicate. If the destination overwrites the guarding predicate,
1040 * then the easiest way to get this right is to save a copy.
1041 */
1042 int tofs = gofs;
1043 if (a->rd == a->pg) {
1044 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1045 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1046 }
1047
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 do_predtest(s, dofs, tofs, psz / 8);
1050 }
1051 return true;
1052}
1053
1054static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1055{
1056 tcg_gen_and_i64(pd, pn, pm);
1057 tcg_gen_and_i64(pd, pd, pg);
1058}
1059
1060static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1061 TCGv_vec pm, TCGv_vec pg)
1062{
1063 tcg_gen_and_vec(vece, pd, pn, pm);
1064 tcg_gen_and_vec(vece, pd, pd, pg);
1065}
1066
1067static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1068{
1069 static const GVecGen4 op = {
1070 .fni8 = gen_and_pg_i64,
1071 .fniv = gen_and_pg_vec,
1072 .fno = gen_helper_sve_and_pppp,
1073 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1074 };
1075 if (a->s) {
1076 return do_pppp_flags(s, a, &op);
1077 } else if (a->rn == a->rm) {
1078 if (a->pg == a->rn) {
1079 return do_mov_p(s, a->rd, a->rn);
1080 } else {
1081 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1082 }
1083 } else if (a->pg == a->rn || a->pg == a->rm) {
1084 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1085 } else {
1086 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1087 }
1088}
1089
1090static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1091{
1092 tcg_gen_andc_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1094}
1095
1096static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1098{
1099 tcg_gen_andc_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1101}
1102
1103static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1104{
1105 static const GVecGen4 op = {
1106 .fni8 = gen_bic_pg_i64,
1107 .fniv = gen_bic_pg_vec,
1108 .fno = gen_helper_sve_bic_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1110 };
1111 if (a->s) {
1112 return do_pppp_flags(s, a, &op);
1113 } else if (a->pg == a->rn) {
1114 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1115 } else {
1116 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1117 }
1118}
1119
1120static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1121{
1122 tcg_gen_xor_i64(pd, pn, pm);
1123 tcg_gen_and_i64(pd, pd, pg);
1124}
1125
1126static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1127 TCGv_vec pm, TCGv_vec pg)
1128{
1129 tcg_gen_xor_vec(vece, pd, pn, pm);
1130 tcg_gen_and_vec(vece, pd, pd, pg);
1131}
1132
1133static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1134{
1135 static const GVecGen4 op = {
1136 .fni8 = gen_eor_pg_i64,
1137 .fniv = gen_eor_pg_vec,
1138 .fno = gen_helper_sve_eor_pppp,
1139 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1140 };
1141 if (a->s) {
1142 return do_pppp_flags(s, a, &op);
1143 } else {
1144 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1145 }
1146}
1147
1148static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1149{
1150 tcg_gen_and_i64(pn, pn, pg);
1151 tcg_gen_andc_i64(pm, pm, pg);
1152 tcg_gen_or_i64(pd, pn, pm);
1153}
1154
1155static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1156 TCGv_vec pm, TCGv_vec pg)
1157{
1158 tcg_gen_and_vec(vece, pn, pn, pg);
1159 tcg_gen_andc_vec(vece, pm, pm, pg);
1160 tcg_gen_or_vec(vece, pd, pn, pm);
1161}
1162
1163static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1164{
1165 static const GVecGen4 op = {
1166 .fni8 = gen_sel_pg_i64,
1167 .fniv = gen_sel_pg_vec,
1168 .fno = gen_helper_sve_sel_pppp,
1169 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1170 };
1171 if (a->s) {
1172 return false;
1173 } else {
1174 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1175 }
1176}
1177
1178static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1179{
1180 tcg_gen_or_i64(pd, pn, pm);
1181 tcg_gen_and_i64(pd, pd, pg);
1182}
1183
1184static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1185 TCGv_vec pm, TCGv_vec pg)
1186{
1187 tcg_gen_or_vec(vece, pd, pn, pm);
1188 tcg_gen_and_vec(vece, pd, pd, pg);
1189}
1190
1191static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1192{
1193 static const GVecGen4 op = {
1194 .fni8 = gen_orr_pg_i64,
1195 .fniv = gen_orr_pg_vec,
1196 .fno = gen_helper_sve_orr_pppp,
1197 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1198 };
1199 if (a->s) {
1200 return do_pppp_flags(s, a, &op);
1201 } else if (a->pg == a->rn && a->rn == a->rm) {
1202 return do_mov_p(s, a->rd, a->rn);
1203 } else {
1204 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1205 }
1206}
1207
1208static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1209{
1210 tcg_gen_orc_i64(pd, pn, pm);
1211 tcg_gen_and_i64(pd, pd, pg);
1212}
1213
1214static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1215 TCGv_vec pm, TCGv_vec pg)
1216{
1217 tcg_gen_orc_vec(vece, pd, pn, pm);
1218 tcg_gen_and_vec(vece, pd, pd, pg);
1219}
1220
1221static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1222{
1223 static const GVecGen4 op = {
1224 .fni8 = gen_orn_pg_i64,
1225 .fniv = gen_orn_pg_vec,
1226 .fno = gen_helper_sve_orn_pppp,
1227 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1228 };
1229 if (a->s) {
1230 return do_pppp_flags(s, a, &op);
1231 } else {
1232 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1233 }
1234}
1235
1236static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1237{
1238 tcg_gen_or_i64(pd, pn, pm);
1239 tcg_gen_andc_i64(pd, pg, pd);
1240}
1241
1242static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1243 TCGv_vec pm, TCGv_vec pg)
1244{
1245 tcg_gen_or_vec(vece, pd, pn, pm);
1246 tcg_gen_andc_vec(vece, pd, pg, pd);
1247}
1248
1249static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1250{
1251 static const GVecGen4 op = {
1252 .fni8 = gen_nor_pg_i64,
1253 .fniv = gen_nor_pg_vec,
1254 .fno = gen_helper_sve_nor_pppp,
1255 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1256 };
1257 if (a->s) {
1258 return do_pppp_flags(s, a, &op);
1259 } else {
1260 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261 }
1262}
1263
1264static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265{
1266 tcg_gen_and_i64(pd, pn, pm);
1267 tcg_gen_andc_i64(pd, pg, pd);
1268}
1269
1270static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271 TCGv_vec pm, TCGv_vec pg)
1272{
1273 tcg_gen_and_vec(vece, pd, pn, pm);
1274 tcg_gen_andc_vec(vece, pd, pg, pd);
1275}
1276
1277static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278{
1279 static const GVecGen4 op = {
1280 .fni8 = gen_nand_pg_i64,
1281 .fniv = gen_nand_pg_vec,
1282 .fno = gen_helper_sve_nand_pppp,
1283 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284 };
1285 if (a->s) {
1286 return do_pppp_flags(s, a, &op);
1287 } else {
1288 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289 }
1290}
1291
9e18d7a6
RH
1292/*
1293 *** SVE Predicate Misc Group
1294 */
1295
1296static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1297{
1298 if (sve_access_check(s)) {
1299 int nofs = pred_full_reg_offset(s, a->rn);
1300 int gofs = pred_full_reg_offset(s, a->pg);
1301 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1302
1303 if (words == 1) {
1304 TCGv_i64 pn = tcg_temp_new_i64();
1305 TCGv_i64 pg = tcg_temp_new_i64();
1306
1307 tcg_gen_ld_i64(pn, cpu_env, nofs);
1308 tcg_gen_ld_i64(pg, cpu_env, gofs);
1309 do_predtest1(pn, pg);
1310
1311 tcg_temp_free_i64(pn);
1312 tcg_temp_free_i64(pg);
1313 } else {
1314 do_predtest(s, nofs, gofs, words);
1315 }
1316 }
1317 return true;
1318}
1319
028e2a7b
RH
1320/* See the ARM pseudocode DecodePredCount. */
1321static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1322{
1323 unsigned elements = fullsz >> esz;
1324 unsigned bound;
1325
1326 switch (pattern) {
1327 case 0x0: /* POW2 */
1328 return pow2floor(elements);
1329 case 0x1: /* VL1 */
1330 case 0x2: /* VL2 */
1331 case 0x3: /* VL3 */
1332 case 0x4: /* VL4 */
1333 case 0x5: /* VL5 */
1334 case 0x6: /* VL6 */
1335 case 0x7: /* VL7 */
1336 case 0x8: /* VL8 */
1337 bound = pattern;
1338 break;
1339 case 0x9: /* VL16 */
1340 case 0xa: /* VL32 */
1341 case 0xb: /* VL64 */
1342 case 0xc: /* VL128 */
1343 case 0xd: /* VL256 */
1344 bound = 16 << (pattern - 9);
1345 break;
1346 case 0x1d: /* MUL4 */
1347 return elements - elements % 4;
1348 case 0x1e: /* MUL3 */
1349 return elements - elements % 3;
1350 case 0x1f: /* ALL */
1351 return elements;
1352 default: /* #uimm5 */
1353 return 0;
1354 }
1355 return elements >= bound ? bound : 0;
1356}
1357
1358/* This handles all of the predicate initialization instructions,
1359 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1360 * so that decode_pred_count returns 0. For SETFFR, we will have
1361 * set RD == 16 == FFR.
1362 */
1363static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1364{
1365 if (!sve_access_check(s)) {
1366 return true;
1367 }
1368
1369 unsigned fullsz = vec_full_reg_size(s);
1370 unsigned ofs = pred_full_reg_offset(s, rd);
1371 unsigned numelem, setsz, i;
1372 uint64_t word, lastword;
1373 TCGv_i64 t;
1374
1375 numelem = decode_pred_count(fullsz, pat, esz);
1376
1377 /* Determine what we must store into each bit, and how many. */
1378 if (numelem == 0) {
1379 lastword = word = 0;
1380 setsz = fullsz;
1381 } else {
1382 setsz = numelem << esz;
1383 lastword = word = pred_esz_masks[esz];
1384 if (setsz % 64) {
1385 lastword &= ~(-1ull << (setsz % 64));
1386 }
1387 }
1388
1389 t = tcg_temp_new_i64();
1390 if (fullsz <= 64) {
1391 tcg_gen_movi_i64(t, lastword);
1392 tcg_gen_st_i64(t, cpu_env, ofs);
1393 goto done;
1394 }
1395
1396 if (word == lastword) {
1397 unsigned maxsz = size_for_gvec(fullsz / 8);
1398 unsigned oprsz = size_for_gvec(setsz / 8);
1399
1400 if (oprsz * 8 == setsz) {
1401 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1402 goto done;
1403 }
1404 if (oprsz * 8 == setsz + 8) {
1405 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1406 tcg_gen_movi_i64(t, 0);
1407 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1408 goto done;
1409 }
1410 }
1411
1412 setsz /= 8;
1413 fullsz /= 8;
1414
1415 tcg_gen_movi_i64(t, word);
1416 for (i = 0; i < setsz; i += 8) {
1417 tcg_gen_st_i64(t, cpu_env, ofs + i);
1418 }
1419 if (lastword != word) {
1420 tcg_gen_movi_i64(t, lastword);
1421 tcg_gen_st_i64(t, cpu_env, ofs + i);
1422 i += 8;
1423 }
1424 if (i < fullsz) {
1425 tcg_gen_movi_i64(t, 0);
1426 for (; i < fullsz; i += 8) {
1427 tcg_gen_st_i64(t, cpu_env, ofs + i);
1428 }
1429 }
1430
1431 done:
1432 tcg_temp_free_i64(t);
1433
1434 /* PTRUES */
1435 if (setflag) {
1436 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1437 tcg_gen_movi_i32(cpu_CF, word == 0);
1438 tcg_gen_movi_i32(cpu_VF, 0);
1439 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1440 }
1441 return true;
1442}
1443
1444static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1445{
1446 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1447}
1448
1449static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1450{
1451 /* Note pat == 31 is #all, to set all elements. */
1452 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1453}
1454
1455static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1456{
1457 /* Note pat == 32 is #unimp, to set no elements. */
1458 return do_predset(s, 0, a->rd, 32, false);
1459}
1460
1461static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1462{
1463 /* The path through do_pppp_flags is complicated enough to want to avoid
1464 * duplication. Frob the arguments into the form of a predicated AND.
1465 */
1466 arg_rprr_s alt_a = {
1467 .rd = a->rd, .pg = a->pg, .s = a->s,
1468 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1469 };
1470 return trans_AND_pppp(s, &alt_a, insn);
1471}
1472
1473static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1474{
1475 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1476}
1477
1478static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1479{
1480 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1481}
1482
1483static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1484 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1485 TCGv_ptr, TCGv_i32))
1486{
1487 if (!sve_access_check(s)) {
1488 return true;
1489 }
1490
1491 TCGv_ptr t_pd = tcg_temp_new_ptr();
1492 TCGv_ptr t_pg = tcg_temp_new_ptr();
1493 TCGv_i32 t;
1494 unsigned desc;
1495
1496 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1497 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1498
1499 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1500 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1501 t = tcg_const_i32(desc);
1502
1503 gen_fn(t, t_pd, t_pg, t);
1504 tcg_temp_free_ptr(t_pd);
1505 tcg_temp_free_ptr(t_pg);
1506
1507 do_pred_flags(t);
1508 tcg_temp_free_i32(t);
1509 return true;
1510}
1511
1512static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1513{
1514 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1515}
1516
1517static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1518{
1519 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1520}
1521
24e82e68
RH
1522/*
1523 *** SVE Element Count Group
1524 */
1525
1526/* Perform an inline saturating addition of a 32-bit value within
1527 * a 64-bit register. The second operand is known to be positive,
1528 * which halves the comparisions we must perform to bound the result.
1529 */
1530static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1531{
1532 int64_t ibound;
1533 TCGv_i64 bound;
1534 TCGCond cond;
1535
1536 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1537 if (u) {
1538 tcg_gen_ext32u_i64(reg, reg);
1539 } else {
1540 tcg_gen_ext32s_i64(reg, reg);
1541 }
1542 if (d) {
1543 tcg_gen_sub_i64(reg, reg, val);
1544 ibound = (u ? 0 : INT32_MIN);
1545 cond = TCG_COND_LT;
1546 } else {
1547 tcg_gen_add_i64(reg, reg, val);
1548 ibound = (u ? UINT32_MAX : INT32_MAX);
1549 cond = TCG_COND_GT;
1550 }
1551 bound = tcg_const_i64(ibound);
1552 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1553 tcg_temp_free_i64(bound);
1554}
1555
1556/* Similarly with 64-bit values. */
1557static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1558{
1559 TCGv_i64 t0 = tcg_temp_new_i64();
1560 TCGv_i64 t1 = tcg_temp_new_i64();
1561 TCGv_i64 t2;
1562
1563 if (u) {
1564 if (d) {
1565 tcg_gen_sub_i64(t0, reg, val);
1566 tcg_gen_movi_i64(t1, 0);
1567 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1568 } else {
1569 tcg_gen_add_i64(t0, reg, val);
1570 tcg_gen_movi_i64(t1, -1);
1571 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1572 }
1573 } else {
1574 if (d) {
1575 /* Detect signed overflow for subtraction. */
1576 tcg_gen_xor_i64(t0, reg, val);
1577 tcg_gen_sub_i64(t1, reg, val);
1578 tcg_gen_xor_i64(reg, reg, t0);
1579 tcg_gen_and_i64(t0, t0, reg);
1580
1581 /* Bound the result. */
1582 tcg_gen_movi_i64(reg, INT64_MIN);
1583 t2 = tcg_const_i64(0);
1584 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1585 } else {
1586 /* Detect signed overflow for addition. */
1587 tcg_gen_xor_i64(t0, reg, val);
1588 tcg_gen_add_i64(reg, reg, val);
1589 tcg_gen_xor_i64(t1, reg, val);
1590 tcg_gen_andc_i64(t0, t1, t0);
1591
1592 /* Bound the result. */
1593 tcg_gen_movi_i64(t1, INT64_MAX);
1594 t2 = tcg_const_i64(0);
1595 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1596 }
1597 tcg_temp_free_i64(t2);
1598 }
1599 tcg_temp_free_i64(t0);
1600 tcg_temp_free_i64(t1);
1601}
1602
1603/* Similarly with a vector and a scalar operand. */
1604static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1605 TCGv_i64 val, bool u, bool d)
1606{
1607 unsigned vsz = vec_full_reg_size(s);
1608 TCGv_ptr dptr, nptr;
1609 TCGv_i32 t32, desc;
1610 TCGv_i64 t64;
1611
1612 dptr = tcg_temp_new_ptr();
1613 nptr = tcg_temp_new_ptr();
1614 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1615 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1616 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1617
1618 switch (esz) {
1619 case MO_8:
1620 t32 = tcg_temp_new_i32();
1621 tcg_gen_extrl_i64_i32(t32, val);
1622 if (d) {
1623 tcg_gen_neg_i32(t32, t32);
1624 }
1625 if (u) {
1626 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1627 } else {
1628 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1629 }
1630 tcg_temp_free_i32(t32);
1631 break;
1632
1633 case MO_16:
1634 t32 = tcg_temp_new_i32();
1635 tcg_gen_extrl_i64_i32(t32, val);
1636 if (d) {
1637 tcg_gen_neg_i32(t32, t32);
1638 }
1639 if (u) {
1640 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1641 } else {
1642 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1643 }
1644 tcg_temp_free_i32(t32);
1645 break;
1646
1647 case MO_32:
1648 t64 = tcg_temp_new_i64();
1649 if (d) {
1650 tcg_gen_neg_i64(t64, val);
1651 } else {
1652 tcg_gen_mov_i64(t64, val);
1653 }
1654 if (u) {
1655 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1656 } else {
1657 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1658 }
1659 tcg_temp_free_i64(t64);
1660 break;
1661
1662 case MO_64:
1663 if (u) {
1664 if (d) {
1665 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1666 } else {
1667 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1668 }
1669 } else if (d) {
1670 t64 = tcg_temp_new_i64();
1671 tcg_gen_neg_i64(t64, val);
1672 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1673 tcg_temp_free_i64(t64);
1674 } else {
1675 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1676 }
1677 break;
1678
1679 default:
1680 g_assert_not_reached();
1681 }
1682
1683 tcg_temp_free_ptr(dptr);
1684 tcg_temp_free_ptr(nptr);
1685 tcg_temp_free_i32(desc);
1686}
1687
1688static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1689{
1690 if (sve_access_check(s)) {
1691 unsigned fullsz = vec_full_reg_size(s);
1692 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1693 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1694 }
1695 return true;
1696}
1697
1698static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1699{
1700 if (sve_access_check(s)) {
1701 unsigned fullsz = vec_full_reg_size(s);
1702 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1703 int inc = numelem * a->imm * (a->d ? -1 : 1);
1704 TCGv_i64 reg = cpu_reg(s, a->rd);
1705
1706 tcg_gen_addi_i64(reg, reg, inc);
1707 }
1708 return true;
1709}
1710
1711static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1712 uint32_t insn)
1713{
1714 if (!sve_access_check(s)) {
1715 return true;
1716 }
1717
1718 unsigned fullsz = vec_full_reg_size(s);
1719 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1720 int inc = numelem * a->imm;
1721 TCGv_i64 reg = cpu_reg(s, a->rd);
1722
1723 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1724 if (inc == 0) {
1725 if (a->u) {
1726 tcg_gen_ext32u_i64(reg, reg);
1727 } else {
1728 tcg_gen_ext32s_i64(reg, reg);
1729 }
1730 } else {
1731 TCGv_i64 t = tcg_const_i64(inc);
1732 do_sat_addsub_32(reg, t, a->u, a->d);
1733 tcg_temp_free_i64(t);
1734 }
1735 return true;
1736}
1737
1738static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1739 uint32_t insn)
1740{
1741 if (!sve_access_check(s)) {
1742 return true;
1743 }
1744
1745 unsigned fullsz = vec_full_reg_size(s);
1746 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1747 int inc = numelem * a->imm;
1748 TCGv_i64 reg = cpu_reg(s, a->rd);
1749
1750 if (inc != 0) {
1751 TCGv_i64 t = tcg_const_i64(inc);
1752 do_sat_addsub_64(reg, t, a->u, a->d);
1753 tcg_temp_free_i64(t);
1754 }
1755 return true;
1756}
1757
1758static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1759{
1760 if (a->esz == 0) {
1761 return false;
1762 }
1763
1764 unsigned fullsz = vec_full_reg_size(s);
1765 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1766 int inc = numelem * a->imm;
1767
1768 if (inc != 0) {
1769 if (sve_access_check(s)) {
1770 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1771 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1772 vec_full_reg_offset(s, a->rn),
1773 t, fullsz, fullsz);
1774 tcg_temp_free_i64(t);
1775 }
1776 } else {
1777 do_mov_z(s, a->rd, a->rn);
1778 }
1779 return true;
1780}
1781
1782static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1783 uint32_t insn)
1784{
1785 if (a->esz == 0) {
1786 return false;
1787 }
1788
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1792
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1798 }
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1801 }
1802 return true;
1803}
1804
e1fa1164
RH
1805/*
1806 *** SVE Bitwise Immediate Group
1807 */
1808
1809static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1810{
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1816 }
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1821 }
1822 return true;
1823}
1824
1825static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1826{
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1828}
1829
1830static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1831{
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1833}
1834
1835static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1836{
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1838}
1839
1840static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1841{
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1847 }
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1850 }
1851 return true;
1852}
1853
f25a2361
RH
1854/*
1855 *** SVE Integer Wide Immediate - Predicated Group
1856 */
1857
1858/* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1860 */
1861static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1863{
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1868 };
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1874
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1878
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1880
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1885}
1886
1887static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1888{
1889 if (a->esz == 0) {
1890 return false;
1891 }
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1898 }
1899 return true;
1900}
1901
1902static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1903{
1904 if (a->esz == 0 && extract32(insn, 13, 1)) {
1905 return false;
1906 }
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1911 }
1912 return true;
1913}
1914
1915static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1916{
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1920 };
1921
1922 if (a->esz == 0 && extract32(insn, 13, 1)) {
1923 return false;
1924 }
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1932 }
1933 return true;
1934}
1935
b94f8f60
RH
1936/*
1937 *** SVE Permute Extract Group
1938 */
1939
1940static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1941{
1942 if (!sve_access_check(s)) {
1943 return true;
1944 }
1945
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1952
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1955 */
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1963 }
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1966 }
1967 return true;
1968}
1969
30562ab7
RH
1970/*
1971 *** SVE Permute - Unpredicated Group
1972 */
1973
1974static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1975{
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1980 }
1981 return true;
1982}
1983
1984static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1985{
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1988 }
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1993
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1996
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
2001 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2002 }
2003 }
2004 return true;
2005}
2006
2007static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2008{
2009 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2010 static gen_insr * const fns[4] = {
2011 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2012 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2013 };
2014 unsigned vsz = vec_full_reg_size(s);
2015 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2016 TCGv_ptr t_zd = tcg_temp_new_ptr();
2017 TCGv_ptr t_zn = tcg_temp_new_ptr();
2018
2019 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2020 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2021
2022 fns[a->esz](t_zd, t_zn, val, desc);
2023
2024 tcg_temp_free_ptr(t_zd);
2025 tcg_temp_free_ptr(t_zn);
2026 tcg_temp_free_i32(desc);
2027}
2028
2029static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2030{
2031 if (sve_access_check(s)) {
2032 TCGv_i64 t = tcg_temp_new_i64();
2033 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2034 do_insr_i64(s, a, t);
2035 tcg_temp_free_i64(t);
2036 }
2037 return true;
2038}
2039
2040static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2041{
2042 if (sve_access_check(s)) {
2043 do_insr_i64(s, a, cpu_reg(s, a->rm));
2044 }
2045 return true;
2046}
2047
2048static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2049{
2050 static gen_helper_gvec_2 * const fns[4] = {
2051 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2052 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2053 };
2054
2055 if (sve_access_check(s)) {
2056 unsigned vsz = vec_full_reg_size(s);
2057 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2058 vec_full_reg_offset(s, a->rn),
2059 vsz, vsz, 0, fns[a->esz]);
2060 }
2061 return true;
2062}
2063
2064static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2065{
2066 static gen_helper_gvec_3 * const fns[4] = {
2067 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2068 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2069 };
2070
2071 if (sve_access_check(s)) {
2072 unsigned vsz = vec_full_reg_size(s);
2073 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2074 vec_full_reg_offset(s, a->rn),
2075 vec_full_reg_offset(s, a->rm),
2076 vsz, vsz, 0, fns[a->esz]);
2077 }
2078 return true;
2079}
2080
2081static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2082{
2083 static gen_helper_gvec_2 * const fns[4][2] = {
2084 { NULL, NULL },
2085 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2086 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2087 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2088 };
2089
2090 if (a->esz == 0) {
2091 return false;
2092 }
2093 if (sve_access_check(s)) {
2094 unsigned vsz = vec_full_reg_size(s);
2095 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2096 vec_full_reg_offset(s, a->rn)
2097 + (a->h ? vsz / 2 : 0),
2098 vsz, vsz, 0, fns[a->esz][a->u]);
2099 }
2100 return true;
2101}
2102
d731d8cb
RH
2103/*
2104 *** SVE Permute - Predicates Group
2105 */
2106
2107static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2108 gen_helper_gvec_3 *fn)
2109{
2110 if (!sve_access_check(s)) {
2111 return true;
2112 }
2113
2114 unsigned vsz = pred_full_reg_size(s);
2115
2116 /* Predicate sizes may be smaller and cannot use simd_desc.
2117 We cannot round up, as we do elsewhere, because we need
2118 the exact size for ZIP2 and REV. We retain the style for
2119 the other helpers for consistency. */
2120 TCGv_ptr t_d = tcg_temp_new_ptr();
2121 TCGv_ptr t_n = tcg_temp_new_ptr();
2122 TCGv_ptr t_m = tcg_temp_new_ptr();
2123 TCGv_i32 t_desc;
2124 int desc;
2125
2126 desc = vsz - 2;
2127 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2128 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2129
2130 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2131 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2132 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2133 t_desc = tcg_const_i32(desc);
2134
2135 fn(t_d, t_n, t_m, t_desc);
2136
2137 tcg_temp_free_ptr(t_d);
2138 tcg_temp_free_ptr(t_n);
2139 tcg_temp_free_ptr(t_m);
2140 tcg_temp_free_i32(t_desc);
2141 return true;
2142}
2143
2144static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2145 gen_helper_gvec_2 *fn)
2146{
2147 if (!sve_access_check(s)) {
2148 return true;
2149 }
2150
2151 unsigned vsz = pred_full_reg_size(s);
2152 TCGv_ptr t_d = tcg_temp_new_ptr();
2153 TCGv_ptr t_n = tcg_temp_new_ptr();
2154 TCGv_i32 t_desc;
2155 int desc;
2156
2157 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2158 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2159
2160 /* Predicate sizes may be smaller and cannot use simd_desc.
2161 We cannot round up, as we do elsewhere, because we need
2162 the exact size for ZIP2 and REV. We retain the style for
2163 the other helpers for consistency. */
2164
2165 desc = vsz - 2;
2166 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2167 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2168 t_desc = tcg_const_i32(desc);
2169
2170 fn(t_d, t_n, t_desc);
2171
2172 tcg_temp_free_i32(t_desc);
2173 tcg_temp_free_ptr(t_d);
2174 tcg_temp_free_ptr(t_n);
2175 return true;
2176}
2177
2178static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2179{
2180 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2181}
2182
2183static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2184{
2185 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2186}
2187
2188static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2189{
2190 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2191}
2192
2193static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2194{
2195 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2196}
2197
2198static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2199{
2200 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2201}
2202
2203static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2204{
2205 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2206}
2207
2208static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2209{
2210 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2211}
2212
2213static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2214{
2215 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2216}
2217
2218static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2219{
2220 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2221}
2222
234b48e9
RH
2223/*
2224 *** SVE Permute - Interleaving Group
2225 */
2226
2227static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2228{
2229 static gen_helper_gvec_3 * const fns[4] = {
2230 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2231 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2232 };
2233
2234 if (sve_access_check(s)) {
2235 unsigned vsz = vec_full_reg_size(s);
2236 unsigned high_ofs = high ? vsz / 2 : 0;
2237 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2238 vec_full_reg_offset(s, a->rn) + high_ofs,
2239 vec_full_reg_offset(s, a->rm) + high_ofs,
2240 vsz, vsz, 0, fns[a->esz]);
2241 }
2242 return true;
2243}
2244
2245static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2246 gen_helper_gvec_3 *fn)
2247{
2248 if (sve_access_check(s)) {
2249 unsigned vsz = vec_full_reg_size(s);
2250 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2251 vec_full_reg_offset(s, a->rn),
2252 vec_full_reg_offset(s, a->rm),
2253 vsz, vsz, data, fn);
2254 }
2255 return true;
2256}
2257
2258static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2259{
2260 return do_zip(s, a, false);
2261}
2262
2263static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2264{
2265 return do_zip(s, a, true);
2266}
2267
2268static gen_helper_gvec_3 * const uzp_fns[4] = {
2269 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2270 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2271};
2272
2273static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2274{
2275 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2276}
2277
2278static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2279{
2280 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2281}
2282
2283static gen_helper_gvec_3 * const trn_fns[4] = {
2284 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2285 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2286};
2287
2288static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2289{
2290 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2291}
2292
2293static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2294{
2295 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2296}
2297
3ca879ae
RH
2298/*
2299 *** SVE Permute Vector - Predicated Group
2300 */
2301
2302static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2303{
2304 static gen_helper_gvec_3 * const fns[4] = {
2305 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2306 };
2307 return do_zpz_ool(s, a, fns[a->esz]);
2308}
2309
ef23cb72
RH
2310/* Call the helper that computes the ARM LastActiveElement pseudocode
2311 * function, scaled by the element size. This includes the not found
2312 * indication; e.g. not found for esz=3 is -8.
2313 */
2314static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2315{
2316 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2317 * round up, as we do elsewhere, because we need the exact size.
2318 */
2319 TCGv_ptr t_p = tcg_temp_new_ptr();
2320 TCGv_i32 t_desc;
2321 unsigned vsz = pred_full_reg_size(s);
2322 unsigned desc;
2323
2324 desc = vsz - 2;
2325 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2326
2327 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2328 t_desc = tcg_const_i32(desc);
2329
2330 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2331
2332 tcg_temp_free_i32(t_desc);
2333 tcg_temp_free_ptr(t_p);
2334}
2335
2336/* Increment LAST to the offset of the next element in the vector,
2337 * wrapping around to 0.
2338 */
2339static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2340{
2341 unsigned vsz = vec_full_reg_size(s);
2342
2343 tcg_gen_addi_i32(last, last, 1 << esz);
2344 if (is_power_of_2(vsz)) {
2345 tcg_gen_andi_i32(last, last, vsz - 1);
2346 } else {
2347 TCGv_i32 max = tcg_const_i32(vsz);
2348 TCGv_i32 zero = tcg_const_i32(0);
2349 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2350 tcg_temp_free_i32(max);
2351 tcg_temp_free_i32(zero);
2352 }
2353}
2354
2355/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2356static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2357{
2358 unsigned vsz = vec_full_reg_size(s);
2359
2360 if (is_power_of_2(vsz)) {
2361 tcg_gen_andi_i32(last, last, vsz - 1);
2362 } else {
2363 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2364 TCGv_i32 zero = tcg_const_i32(0);
2365 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2366 tcg_temp_free_i32(max);
2367 tcg_temp_free_i32(zero);
2368 }
2369}
2370
2371/* Load an unsigned element of ESZ from BASE+OFS. */
2372static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2373{
2374 TCGv_i64 r = tcg_temp_new_i64();
2375
2376 switch (esz) {
2377 case 0:
2378 tcg_gen_ld8u_i64(r, base, ofs);
2379 break;
2380 case 1:
2381 tcg_gen_ld16u_i64(r, base, ofs);
2382 break;
2383 case 2:
2384 tcg_gen_ld32u_i64(r, base, ofs);
2385 break;
2386 case 3:
2387 tcg_gen_ld_i64(r, base, ofs);
2388 break;
2389 default:
2390 g_assert_not_reached();
2391 }
2392 return r;
2393}
2394
2395/* Load an unsigned element of ESZ from RM[LAST]. */
2396static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2397 int rm, int esz)
2398{
2399 TCGv_ptr p = tcg_temp_new_ptr();
2400 TCGv_i64 r;
2401
2402 /* Convert offset into vector into offset into ENV.
2403 * The final adjustment for the vector register base
2404 * is added via constant offset to the load.
2405 */
2406#ifdef HOST_WORDS_BIGENDIAN
2407 /* Adjust for element ordering. See vec_reg_offset. */
2408 if (esz < 3) {
2409 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2410 }
2411#endif
2412 tcg_gen_ext_i32_ptr(p, last);
2413 tcg_gen_add_ptr(p, p, cpu_env);
2414
2415 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2416 tcg_temp_free_ptr(p);
2417
2418 return r;
2419}
2420
2421/* Compute CLAST for a Zreg. */
2422static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2423{
2424 TCGv_i32 last;
2425 TCGLabel *over;
2426 TCGv_i64 ele;
2427 unsigned vsz, esz = a->esz;
2428
2429 if (!sve_access_check(s)) {
2430 return true;
2431 }
2432
2433 last = tcg_temp_local_new_i32();
2434 over = gen_new_label();
2435
2436 find_last_active(s, last, esz, a->pg);
2437
2438 /* There is of course no movcond for a 2048-bit vector,
2439 * so we must branch over the actual store.
2440 */
2441 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2442
2443 if (!before) {
2444 incr_last_active(s, last, esz);
2445 }
2446
2447 ele = load_last_active(s, last, a->rm, esz);
2448 tcg_temp_free_i32(last);
2449
2450 vsz = vec_full_reg_size(s);
2451 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2452 tcg_temp_free_i64(ele);
2453
2454 /* If this insn used MOVPRFX, we may need a second move. */
2455 if (a->rd != a->rn) {
2456 TCGLabel *done = gen_new_label();
2457 tcg_gen_br(done);
2458
2459 gen_set_label(over);
2460 do_mov_z(s, a->rd, a->rn);
2461
2462 gen_set_label(done);
2463 } else {
2464 gen_set_label(over);
2465 }
2466 return true;
2467}
2468
2469static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2470{
2471 return do_clast_vector(s, a, false);
2472}
2473
2474static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2475{
2476 return do_clast_vector(s, a, true);
2477}
2478
2479/* Compute CLAST for a scalar. */
2480static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2481 bool before, TCGv_i64 reg_val)
2482{
2483 TCGv_i32 last = tcg_temp_new_i32();
2484 TCGv_i64 ele, cmp, zero;
2485
2486 find_last_active(s, last, esz, pg);
2487
2488 /* Extend the original value of last prior to incrementing. */
2489 cmp = tcg_temp_new_i64();
2490 tcg_gen_ext_i32_i64(cmp, last);
2491
2492 if (!before) {
2493 incr_last_active(s, last, esz);
2494 }
2495
2496 /* The conceit here is that while last < 0 indicates not found, after
2497 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2498 * from which we can load garbage. We then discard the garbage with
2499 * a conditional move.
2500 */
2501 ele = load_last_active(s, last, rm, esz);
2502 tcg_temp_free_i32(last);
2503
2504 zero = tcg_const_i64(0);
2505 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2506
2507 tcg_temp_free_i64(zero);
2508 tcg_temp_free_i64(cmp);
2509 tcg_temp_free_i64(ele);
2510}
2511
2512/* Compute CLAST for a Vreg. */
2513static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2514{
2515 if (sve_access_check(s)) {
2516 int esz = a->esz;
2517 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2518 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2519
2520 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2521 write_fp_dreg(s, a->rd, reg);
2522 tcg_temp_free_i64(reg);
2523 }
2524 return true;
2525}
2526
2527static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2528{
2529 return do_clast_fp(s, a, false);
2530}
2531
2532static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2533{
2534 return do_clast_fp(s, a, true);
2535}
2536
2537/* Compute CLAST for a Xreg. */
2538static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2539{
2540 TCGv_i64 reg;
2541
2542 if (!sve_access_check(s)) {
2543 return true;
2544 }
2545
2546 reg = cpu_reg(s, a->rd);
2547 switch (a->esz) {
2548 case 0:
2549 tcg_gen_ext8u_i64(reg, reg);
2550 break;
2551 case 1:
2552 tcg_gen_ext16u_i64(reg, reg);
2553 break;
2554 case 2:
2555 tcg_gen_ext32u_i64(reg, reg);
2556 break;
2557 case 3:
2558 break;
2559 default:
2560 g_assert_not_reached();
2561 }
2562
2563 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2564 return true;
2565}
2566
2567static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2568{
2569 return do_clast_general(s, a, false);
2570}
2571
2572static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2573{
2574 return do_clast_general(s, a, true);
2575}
2576
2577/* Compute LAST for a scalar. */
2578static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2579 int pg, int rm, bool before)
2580{
2581 TCGv_i32 last = tcg_temp_new_i32();
2582 TCGv_i64 ret;
2583
2584 find_last_active(s, last, esz, pg);
2585 if (before) {
2586 wrap_last_active(s, last, esz);
2587 } else {
2588 incr_last_active(s, last, esz);
2589 }
2590
2591 ret = load_last_active(s, last, rm, esz);
2592 tcg_temp_free_i32(last);
2593 return ret;
2594}
2595
2596/* Compute LAST for a Vreg. */
2597static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2598{
2599 if (sve_access_check(s)) {
2600 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2601 write_fp_dreg(s, a->rd, val);
2602 tcg_temp_free_i64(val);
2603 }
2604 return true;
2605}
2606
2607static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2608{
2609 return do_last_fp(s, a, false);
2610}
2611
2612static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2613{
2614 return do_last_fp(s, a, true);
2615}
2616
2617/* Compute LAST for a Xreg. */
2618static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2619{
2620 if (sve_access_check(s)) {
2621 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2622 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2623 tcg_temp_free_i64(val);
2624 }
2625 return true;
2626}
2627
2628static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629{
2630 return do_last_general(s, a, false);
2631}
2632
2633static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2634{
2635 return do_last_general(s, a, true);
2636}
2637
792a5578
RH
2638static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2639{
2640 if (sve_access_check(s)) {
2641 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2642 }
2643 return true;
2644}
2645
2646static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2647{
2648 if (sve_access_check(s)) {
2649 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2650 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2651 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2652 tcg_temp_free_i64(t);
2653 }
2654 return true;
2655}
2656
dae8fb90
RH
2657static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658{
2659 static gen_helper_gvec_3 * const fns[4] = {
2660 NULL,
2661 gen_helper_sve_revb_h,
2662 gen_helper_sve_revb_s,
2663 gen_helper_sve_revb_d,
2664 };
2665 return do_zpz_ool(s, a, fns[a->esz]);
2666}
2667
2668static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669{
2670 static gen_helper_gvec_3 * const fns[4] = {
2671 NULL,
2672 NULL,
2673 gen_helper_sve_revh_s,
2674 gen_helper_sve_revh_d,
2675 };
2676 return do_zpz_ool(s, a, fns[a->esz]);
2677}
2678
2679static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680{
2681 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2682}
2683
2684static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685{
2686 static gen_helper_gvec_3 * const fns[4] = {
2687 gen_helper_sve_rbit_b,
2688 gen_helper_sve_rbit_h,
2689 gen_helper_sve_rbit_s,
2690 gen_helper_sve_rbit_d,
2691 };
2692 return do_zpz_ool(s, a, fns[a->esz]);
2693}
2694
b48ff240
RH
2695static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2696{
2697 if (sve_access_check(s)) {
2698 unsigned vsz = vec_full_reg_size(s);
2699 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2700 vec_full_reg_offset(s, a->rn),
2701 vec_full_reg_offset(s, a->rm),
2702 pred_full_reg_offset(s, a->pg),
2703 vsz, vsz, a->esz, gen_helper_sve_splice);
2704 }
2705 return true;
2706}
2707
757f9cff
RH
2708/*
2709 *** SVE Integer Compare - Vectors Group
2710 */
2711
2712static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2713 gen_helper_gvec_flags_4 *gen_fn)
2714{
2715 TCGv_ptr pd, zn, zm, pg;
2716 unsigned vsz;
2717 TCGv_i32 t;
2718
2719 if (gen_fn == NULL) {
2720 return false;
2721 }
2722 if (!sve_access_check(s)) {
2723 return true;
2724 }
2725
2726 vsz = vec_full_reg_size(s);
2727 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2728 pd = tcg_temp_new_ptr();
2729 zn = tcg_temp_new_ptr();
2730 zm = tcg_temp_new_ptr();
2731 pg = tcg_temp_new_ptr();
2732
2733 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2734 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2735 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2736 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2737
2738 gen_fn(t, pd, zn, zm, pg, t);
2739
2740 tcg_temp_free_ptr(pd);
2741 tcg_temp_free_ptr(zn);
2742 tcg_temp_free_ptr(zm);
2743 tcg_temp_free_ptr(pg);
2744
2745 do_pred_flags(t);
2746
2747 tcg_temp_free_i32(t);
2748 return true;
2749}
2750
2751#define DO_PPZZ(NAME, name) \
2752static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2753 uint32_t insn) \
2754{ \
2755 static gen_helper_gvec_flags_4 * const fns[4] = { \
2756 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2757 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2758 }; \
2759 return do_ppzz_flags(s, a, fns[a->esz]); \
2760}
2761
2762DO_PPZZ(CMPEQ, cmpeq)
2763DO_PPZZ(CMPNE, cmpne)
2764DO_PPZZ(CMPGT, cmpgt)
2765DO_PPZZ(CMPGE, cmpge)
2766DO_PPZZ(CMPHI, cmphi)
2767DO_PPZZ(CMPHS, cmphs)
2768
2769#undef DO_PPZZ
2770
2771#define DO_PPZW(NAME, name) \
2772static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2773 uint32_t insn) \
2774{ \
2775 static gen_helper_gvec_flags_4 * const fns[4] = { \
2776 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2777 gen_helper_sve_##name##_ppzw_s, NULL \
2778 }; \
2779 return do_ppzz_flags(s, a, fns[a->esz]); \
2780}
2781
2782DO_PPZW(CMPEQ, cmpeq)
2783DO_PPZW(CMPNE, cmpne)
2784DO_PPZW(CMPGT, cmpgt)
2785DO_PPZW(CMPGE, cmpge)
2786DO_PPZW(CMPHI, cmphi)
2787DO_PPZW(CMPHS, cmphs)
2788DO_PPZW(CMPLT, cmplt)
2789DO_PPZW(CMPLE, cmple)
2790DO_PPZW(CMPLO, cmplo)
2791DO_PPZW(CMPLS, cmpls)
2792
2793#undef DO_PPZW
2794
38cadeba
RH
2795/*
2796 *** SVE Integer Compare - Immediate Groups
2797 */
2798
2799static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2800 gen_helper_gvec_flags_3 *gen_fn)
2801{
2802 TCGv_ptr pd, zn, pg;
2803 unsigned vsz;
2804 TCGv_i32 t;
2805
2806 if (gen_fn == NULL) {
2807 return false;
2808 }
2809 if (!sve_access_check(s)) {
2810 return true;
2811 }
2812
2813 vsz = vec_full_reg_size(s);
2814 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2815 pd = tcg_temp_new_ptr();
2816 zn = tcg_temp_new_ptr();
2817 pg = tcg_temp_new_ptr();
2818
2819 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2820 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2821 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2822
2823 gen_fn(t, pd, zn, pg, t);
2824
2825 tcg_temp_free_ptr(pd);
2826 tcg_temp_free_ptr(zn);
2827 tcg_temp_free_ptr(pg);
2828
2829 do_pred_flags(t);
2830
2831 tcg_temp_free_i32(t);
2832 return true;
2833}
2834
2835#define DO_PPZI(NAME, name) \
2836static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2837 uint32_t insn) \
2838{ \
2839 static gen_helper_gvec_flags_3 * const fns[4] = { \
2840 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2841 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2842 }; \
2843 return do_ppzi_flags(s, a, fns[a->esz]); \
2844}
2845
2846DO_PPZI(CMPEQ, cmpeq)
2847DO_PPZI(CMPNE, cmpne)
2848DO_PPZI(CMPGT, cmpgt)
2849DO_PPZI(CMPGE, cmpge)
2850DO_PPZI(CMPHI, cmphi)
2851DO_PPZI(CMPHS, cmphs)
2852DO_PPZI(CMPLT, cmplt)
2853DO_PPZI(CMPLE, cmple)
2854DO_PPZI(CMPLO, cmplo)
2855DO_PPZI(CMPLS, cmpls)
2856
2857#undef DO_PPZI
2858
35da316f
RH
2859/*
2860 *** SVE Partition Break Group
2861 */
2862
2863static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2864 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2865{
2866 if (!sve_access_check(s)) {
2867 return true;
2868 }
2869
2870 unsigned vsz = pred_full_reg_size(s);
2871
2872 /* Predicate sizes may be smaller and cannot use simd_desc. */
2873 TCGv_ptr d = tcg_temp_new_ptr();
2874 TCGv_ptr n = tcg_temp_new_ptr();
2875 TCGv_ptr m = tcg_temp_new_ptr();
2876 TCGv_ptr g = tcg_temp_new_ptr();
2877 TCGv_i32 t = tcg_const_i32(vsz - 2);
2878
2879 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2880 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2881 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2882 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2883
2884 if (a->s) {
2885 fn_s(t, d, n, m, g, t);
2886 do_pred_flags(t);
2887 } else {
2888 fn(d, n, m, g, t);
2889 }
2890 tcg_temp_free_ptr(d);
2891 tcg_temp_free_ptr(n);
2892 tcg_temp_free_ptr(m);
2893 tcg_temp_free_ptr(g);
2894 tcg_temp_free_i32(t);
2895 return true;
2896}
2897
2898static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2899 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2900{
2901 if (!sve_access_check(s)) {
2902 return true;
2903 }
2904
2905 unsigned vsz = pred_full_reg_size(s);
2906
2907 /* Predicate sizes may be smaller and cannot use simd_desc. */
2908 TCGv_ptr d = tcg_temp_new_ptr();
2909 TCGv_ptr n = tcg_temp_new_ptr();
2910 TCGv_ptr g = tcg_temp_new_ptr();
2911 TCGv_i32 t = tcg_const_i32(vsz - 2);
2912
2913 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2914 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2915 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2916
2917 if (a->s) {
2918 fn_s(t, d, n, g, t);
2919 do_pred_flags(t);
2920 } else {
2921 fn(d, n, g, t);
2922 }
2923 tcg_temp_free_ptr(d);
2924 tcg_temp_free_ptr(n);
2925 tcg_temp_free_ptr(g);
2926 tcg_temp_free_i32(t);
2927 return true;
2928}
2929
2930static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2931{
2932 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2933}
2934
2935static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2936{
2937 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2938}
2939
2940static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2941{
2942 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2943}
2944
2945static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2946{
2947 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2948}
2949
2950static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2951{
2952 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2953}
2954
2955static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2956{
2957 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2958}
2959
2960static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2961{
2962 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2963}
2964
9ee3a611
RH
2965/*
2966 *** SVE Predicate Count Group
2967 */
2968
2969static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2970{
2971 unsigned psz = pred_full_reg_size(s);
2972
2973 if (psz <= 8) {
2974 uint64_t psz_mask;
2975
2976 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2977 if (pn != pg) {
2978 TCGv_i64 g = tcg_temp_new_i64();
2979 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2980 tcg_gen_and_i64(val, val, g);
2981 tcg_temp_free_i64(g);
2982 }
2983
2984 /* Reduce the pred_esz_masks value simply to reduce the
2985 * size of the code generated here.
2986 */
2987 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2988 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2989
2990 tcg_gen_ctpop_i64(val, val);
2991 } else {
2992 TCGv_ptr t_pn = tcg_temp_new_ptr();
2993 TCGv_ptr t_pg = tcg_temp_new_ptr();
2994 unsigned desc;
2995 TCGv_i32 t_desc;
2996
2997 desc = psz - 2;
2998 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2999
3000 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3001 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3002 t_desc = tcg_const_i32(desc);
3003
3004 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3005 tcg_temp_free_ptr(t_pn);
3006 tcg_temp_free_ptr(t_pg);
3007 tcg_temp_free_i32(t_desc);
3008 }
3009}
3010
3011static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3012{
3013 if (sve_access_check(s)) {
3014 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3015 }
3016 return true;
3017}
3018
3019static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3020 uint32_t insn)
3021{
3022 if (sve_access_check(s)) {
3023 TCGv_i64 reg = cpu_reg(s, a->rd);
3024 TCGv_i64 val = tcg_temp_new_i64();
3025
3026 do_cntp(s, val, a->esz, a->pg, a->pg);
3027 if (a->d) {
3028 tcg_gen_sub_i64(reg, reg, val);
3029 } else {
3030 tcg_gen_add_i64(reg, reg, val);
3031 }
3032 tcg_temp_free_i64(val);
3033 }
3034 return true;
3035}
3036
3037static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3038 uint32_t insn)
3039{
3040 if (a->esz == 0) {
3041 return false;
3042 }
3043 if (sve_access_check(s)) {
3044 unsigned vsz = vec_full_reg_size(s);
3045 TCGv_i64 val = tcg_temp_new_i64();
3046 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3047
3048 do_cntp(s, val, a->esz, a->pg, a->pg);
3049 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3050 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3051 }
3052 return true;
3053}
3054
3055static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3056 uint32_t insn)
3057{
3058 if (sve_access_check(s)) {
3059 TCGv_i64 reg = cpu_reg(s, a->rd);
3060 TCGv_i64 val = tcg_temp_new_i64();
3061
3062 do_cntp(s, val, a->esz, a->pg, a->pg);
3063 do_sat_addsub_32(reg, val, a->u, a->d);
3064 }
3065 return true;
3066}
3067
3068static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3069 uint32_t insn)
3070{
3071 if (sve_access_check(s)) {
3072 TCGv_i64 reg = cpu_reg(s, a->rd);
3073 TCGv_i64 val = tcg_temp_new_i64();
3074
3075 do_cntp(s, val, a->esz, a->pg, a->pg);
3076 do_sat_addsub_64(reg, val, a->u, a->d);
3077 }
3078 return true;
3079}
3080
3081static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3082 uint32_t insn)
3083{
3084 if (a->esz == 0) {
3085 return false;
3086 }
3087 if (sve_access_check(s)) {
3088 TCGv_i64 val = tcg_temp_new_i64();
3089 do_cntp(s, val, a->esz, a->pg, a->pg);
3090 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3091 }
3092 return true;
3093}
3094
d1822297
RH
3095/*
3096 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3097 */
3098
3099/* Subroutine loading a vector register at VOFS of LEN bytes.
3100 * The load should begin at the address Rn + IMM.
3101 */
3102
3103static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3104 int rn, int imm)
3105{
3106 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3107 uint32_t len_remain = len % 8;
3108 uint32_t nparts = len / 8 + ctpop8(len_remain);
3109 int midx = get_mem_index(s);
3110 TCGv_i64 addr, t0, t1;
3111
3112 addr = tcg_temp_new_i64();
3113 t0 = tcg_temp_new_i64();
3114
3115 /* Note that unpredicated load/store of vector/predicate registers
3116 * are defined as a stream of bytes, which equates to little-endian
3117 * operations on larger quantities. There is no nice way to force
3118 * a little-endian load for aarch64_be-linux-user out of line.
3119 *
3120 * Attempt to keep code expansion to a minimum by limiting the
3121 * amount of unrolling done.
3122 */
3123 if (nparts <= 4) {
3124 int i;
3125
3126 for (i = 0; i < len_align; i += 8) {
3127 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3128 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3129 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3130 }
3131 } else {
3132 TCGLabel *loop = gen_new_label();
3133 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3134
3135 gen_set_label(loop);
3136
3137 /* Minimize the number of local temps that must be re-read from
3138 * the stack each iteration. Instead, re-compute values other
3139 * than the loop counter.
3140 */
3141 tp = tcg_temp_new_ptr();
3142 tcg_gen_addi_ptr(tp, i, imm);
3143 tcg_gen_extu_ptr_i64(addr, tp);
3144 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3145
3146 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3147
3148 tcg_gen_add_ptr(tp, cpu_env, i);
3149 tcg_gen_addi_ptr(i, i, 8);
3150 tcg_gen_st_i64(t0, tp, vofs);
3151 tcg_temp_free_ptr(tp);
3152
3153 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3154 tcg_temp_free_ptr(i);
3155 }
3156
3157 /* Predicate register loads can be any multiple of 2.
3158 * Note that we still store the entire 64-bit unit into cpu_env.
3159 */
3160 if (len_remain) {
3161 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3162
3163 switch (len_remain) {
3164 case 2:
3165 case 4:
3166 case 8:
3167 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3168 break;
3169
3170 case 6:
3171 t1 = tcg_temp_new_i64();
3172 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3173 tcg_gen_addi_i64(addr, addr, 4);
3174 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3175 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3176 tcg_temp_free_i64(t1);
3177 break;
3178
3179 default:
3180 g_assert_not_reached();
3181 }
3182 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3183 }
3184 tcg_temp_free_i64(addr);
3185 tcg_temp_free_i64(t0);
3186}
3187
3188static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3189{
3190 if (sve_access_check(s)) {
3191 int size = vec_full_reg_size(s);
3192 int off = vec_full_reg_offset(s, a->rd);
3193 do_ldr(s, off, size, a->rn, a->imm * size);
3194 }
3195 return true;
3196}
3197
3198static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3199{
3200 if (sve_access_check(s)) {
3201 int size = pred_full_reg_size(s);
3202 int off = pred_full_reg_offset(s, a->rd);
3203 do_ldr(s, off, size, a->rn, a->imm * size);
3204 }
3205 return true;
3206}