]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Fix sve_punpk_p vs odd vector lengths
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
139c1837 103#include "decode-sve.c.inc"
38388f7e
RH
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
40e32e5a
RH
145/* Invoke an out-of-line helper on 2 Zregs. */
146static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
148{
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
153}
154
e645d1a1
RH
155/* Invoke an out-of-line helper on 3 Zregs. */
156static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
158{
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
164}
165
96a461f7
RH
166/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
168 int rd, int rn, int pg, int data)
169{
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 pred_full_reg_offset(s, pg),
174 vsz, vsz, data, fn);
175}
176
36cbb7a8
RH
177/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
179 int rd, int rn, int rm, int pg, int data)
180{
181 unsigned vsz = vec_full_reg_size(s);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
183 vec_full_reg_offset(s, rn),
184 vec_full_reg_offset(s, rm),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
187}
f7d79c41 188
36cbb7a8 189/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
190static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
191 int esz, int rd, int rn)
38388f7e 192{
f7d79c41
RH
193 unsigned vsz = vec_full_reg_size(s);
194 gvec_fn(esz, vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
196}
197
39eea561 198/* Invoke a vector expander on three Zregs. */
28c4da31
RH
199static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
200 int esz, int rd, int rn, int rm)
38388f7e 201{
28c4da31
RH
202 unsigned vsz = vec_full_reg_size(s);
203 gvec_fn(esz, vec_full_reg_offset(s, rd),
204 vec_full_reg_offset(s, rn),
205 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
206}
207
39eea561
RH
208/* Invoke a vector move on two Zregs. */
209static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 210{
f7d79c41
RH
211 if (sve_access_check(s)) {
212 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
213 }
214 return true;
38388f7e
RH
215}
216
d9d78dcc
RH
217/* Initialize a Zreg with replications of a 64-bit immediate. */
218static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
219{
220 unsigned vsz = vec_full_reg_size(s);
8711e71f 221 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
222}
223
516e246a 224/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
225static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
226 int rd, int rn, int rm)
516e246a 227{
dd81a8d7
RH
228 unsigned psz = pred_gvec_reg_size(s);
229 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn),
231 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
232}
233
234/* Invoke a vector move on two Pregs. */
235static bool do_mov_p(DisasContext *s, int rd, int rn)
236{
d0b2df5a
RH
237 if (sve_access_check(s)) {
238 unsigned psz = pred_gvec_reg_size(s);
239 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
240 pred_full_reg_offset(s, rn), psz, psz);
241 }
242 return true;
516e246a
RH
243}
244
9e18d7a6
RH
245/* Set the cpu flags as per a return from an SVE helper. */
246static void do_pred_flags(TCGv_i32 t)
247{
248 tcg_gen_mov_i32(cpu_NF, t);
249 tcg_gen_andi_i32(cpu_ZF, t, 2);
250 tcg_gen_andi_i32(cpu_CF, t, 1);
251 tcg_gen_movi_i32(cpu_VF, 0);
252}
253
254/* Subroutines computing the ARM PredTest psuedofunction. */
255static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
256{
257 TCGv_i32 t = tcg_temp_new_i32();
258
259 gen_helper_sve_predtest1(t, d, g);
260 do_pred_flags(t);
261 tcg_temp_free_i32(t);
262}
263
264static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
265{
266 TCGv_ptr dptr = tcg_temp_new_ptr();
267 TCGv_ptr gptr = tcg_temp_new_ptr();
268 TCGv_i32 t;
269
270 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
271 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
272 t = tcg_const_i32(words);
273
274 gen_helper_sve_predtest(t, dptr, gptr, t);
275 tcg_temp_free_ptr(dptr);
276 tcg_temp_free_ptr(gptr);
277
278 do_pred_flags(t);
279 tcg_temp_free_i32(t);
280}
281
028e2a7b
RH
282/* For each element size, the bits within a predicate word that are active. */
283const uint64_t pred_esz_masks[4] = {
284 0xffffffffffffffffull, 0x5555555555555555ull,
285 0x1111111111111111ull, 0x0101010101010101ull
286};
287
39eea561
RH
288/*
289 *** SVE Logical - Unpredicated Group
290 */
291
28c4da31
RH
292static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
293{
294 if (sve_access_check(s)) {
295 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
296 }
297 return true;
298}
299
3a7be554 300static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 301{
28c4da31 302 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
303}
304
3a7be554 305static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 306{
28c4da31 307 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
308}
309
3a7be554 310static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 311{
28c4da31 312 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
313}
314
3a7be554 315static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 316{
28c4da31 317 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 318}
d1822297 319
fea98f9c
RH
320/*
321 *** SVE Integer Arithmetic - Unpredicated Group
322 */
323
3a7be554 324static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 325{
28c4da31 326 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
327}
328
3a7be554 329static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 330{
28c4da31 331 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
332}
333
3a7be554 334static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 335{
28c4da31 336 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
337}
338
3a7be554 339static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 340{
28c4da31 341 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
342}
343
3a7be554 344static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 345{
28c4da31 346 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
347}
348
3a7be554 349static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 350{
28c4da31 351 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
352}
353
f97cfd59
RH
354/*
355 *** SVE Integer Arithmetic - Binary Predicated Group
356 */
357
358static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
359{
f97cfd59
RH
360 if (fn == NULL) {
361 return false;
362 }
363 if (sve_access_check(s)) {
36cbb7a8 364 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
365 }
366 return true;
367}
368
a2103582
RH
369/* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
371 */
372static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
373{
374 static gen_helper_gvec_4 * const fns[4] = {
375 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
376 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
377 };
36cbb7a8 378 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
379}
380
f97cfd59 381#define DO_ZPZZ(NAME, name) \
3a7be554 382static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
383{ \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
387 }; \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
389}
390
391DO_ZPZZ(AND, and)
392DO_ZPZZ(EOR, eor)
393DO_ZPZZ(ORR, orr)
394DO_ZPZZ(BIC, bic)
395
396DO_ZPZZ(ADD, add)
397DO_ZPZZ(SUB, sub)
398
399DO_ZPZZ(SMAX, smax)
400DO_ZPZZ(UMAX, umax)
401DO_ZPZZ(SMIN, smin)
402DO_ZPZZ(UMIN, umin)
403DO_ZPZZ(SABD, sabd)
404DO_ZPZZ(UABD, uabd)
405
406DO_ZPZZ(MUL, mul)
407DO_ZPZZ(SMULH, smulh)
408DO_ZPZZ(UMULH, umulh)
409
27721dbb
RH
410DO_ZPZZ(ASR, asr)
411DO_ZPZZ(LSR, lsr)
412DO_ZPZZ(LSL, lsl)
413
3a7be554 414static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
415{
416 static gen_helper_gvec_4 * const fns[4] = {
417 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
418 };
419 return do_zpzz_ool(s, a, fns[a->esz]);
420}
421
3a7be554 422static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
423{
424 static gen_helper_gvec_4 * const fns[4] = {
425 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
426 };
427 return do_zpzz_ool(s, a, fns[a->esz]);
428}
429
3a7be554 430static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
431{
432 if (sve_access_check(s)) {
433 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
434 }
435 return true;
436}
d3fe4a29 437
f97cfd59
RH
438#undef DO_ZPZZ
439
afac6d04
RH
440/*
441 *** SVE Integer Arithmetic - Unary Predicated Group
442 */
443
444static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
445{
446 if (fn == NULL) {
447 return false;
448 }
449 if (sve_access_check(s)) {
96a461f7 450 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
451 }
452 return true;
453}
454
455#define DO_ZPZ(NAME, name) \
3a7be554 456static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
457{ \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
461 }; \
462 return do_zpz_ool(s, a, fns[a->esz]); \
463}
464
465DO_ZPZ(CLS, cls)
466DO_ZPZ(CLZ, clz)
467DO_ZPZ(CNT_zpz, cnt_zpz)
468DO_ZPZ(CNOT, cnot)
469DO_ZPZ(NOT_zpz, not_zpz)
470DO_ZPZ(ABS, abs)
471DO_ZPZ(NEG, neg)
472
3a7be554 473static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
474{
475 static gen_helper_gvec_3 * const fns[4] = {
476 NULL,
477 gen_helper_sve_fabs_h,
478 gen_helper_sve_fabs_s,
479 gen_helper_sve_fabs_d
480 };
481 return do_zpz_ool(s, a, fns[a->esz]);
482}
483
3a7be554 484static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
485{
486 static gen_helper_gvec_3 * const fns[4] = {
487 NULL,
488 gen_helper_sve_fneg_h,
489 gen_helper_sve_fneg_s,
490 gen_helper_sve_fneg_d
491 };
492 return do_zpz_ool(s, a, fns[a->esz]);
493}
494
3a7be554 495static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
496{
497 static gen_helper_gvec_3 * const fns[4] = {
498 NULL,
499 gen_helper_sve_sxtb_h,
500 gen_helper_sve_sxtb_s,
501 gen_helper_sve_sxtb_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504}
505
3a7be554 506static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
507{
508 static gen_helper_gvec_3 * const fns[4] = {
509 NULL,
510 gen_helper_sve_uxtb_h,
511 gen_helper_sve_uxtb_s,
512 gen_helper_sve_uxtb_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515}
516
3a7be554 517static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
518{
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_sxth_s,
522 gen_helper_sve_sxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525}
526
3a7be554 527static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
528{
529 static gen_helper_gvec_3 * const fns[4] = {
530 NULL, NULL,
531 gen_helper_sve_uxth_s,
532 gen_helper_sve_uxth_d
533 };
534 return do_zpz_ool(s, a, fns[a->esz]);
535}
536
3a7be554 537static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
538{
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
540}
541
3a7be554 542static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
543{
544 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
545}
546
547#undef DO_ZPZ
548
047cec97
RH
549/*
550 *** SVE Integer Reduction Group
551 */
552
553typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
554static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
555 gen_helper_gvec_reduc *fn)
556{
557 unsigned vsz = vec_full_reg_size(s);
558 TCGv_ptr t_zn, t_pg;
559 TCGv_i32 desc;
560 TCGv_i64 temp;
561
562 if (fn == NULL) {
563 return false;
564 }
565 if (!sve_access_check(s)) {
566 return true;
567 }
568
569 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
570 temp = tcg_temp_new_i64();
571 t_zn = tcg_temp_new_ptr();
572 t_pg = tcg_temp_new_ptr();
573
574 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
575 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
576 fn(temp, t_zn, t_pg, desc);
577 tcg_temp_free_ptr(t_zn);
578 tcg_temp_free_ptr(t_pg);
579 tcg_temp_free_i32(desc);
580
581 write_fp_dreg(s, a->rd, temp);
582 tcg_temp_free_i64(temp);
583 return true;
584}
585
586#define DO_VPZ(NAME, name) \
3a7be554 587static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
588{ \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
592 }; \
593 return do_vpz_ool(s, a, fns[a->esz]); \
594}
595
596DO_VPZ(ORV, orv)
597DO_VPZ(ANDV, andv)
598DO_VPZ(EORV, eorv)
599
600DO_VPZ(UADDV, uaddv)
601DO_VPZ(SMAXV, smaxv)
602DO_VPZ(UMAXV, umaxv)
603DO_VPZ(SMINV, sminv)
604DO_VPZ(UMINV, uminv)
605
3a7be554 606static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
607{
608 static gen_helper_gvec_reduc * const fns[4] = {
609 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
610 gen_helper_sve_saddv_s, NULL
611 };
612 return do_vpz_ool(s, a, fns[a->esz]);
613}
614
615#undef DO_VPZ
616
ccd841c3
RH
617/*
618 *** SVE Shift by Immediate - Predicated Group
619 */
620
60245996
RH
621/*
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
ccd841c3 624 */
60245996
RH
625static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
626 int esz, bool invert)
ccd841c3 627{
60245996
RH
628 static gen_helper_gvec_3 * const fns[4] = {
629 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
630 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 631 };
60245996 632
ccd841c3 633 if (sve_access_check(s)) {
96a461f7 634 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
635 }
636 return true;
637}
638
639static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
640 gen_helper_gvec_3 *fn)
641{
642 if (sve_access_check(s)) {
96a461f7 643 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
644 }
645 return true;
646}
647
3a7be554 648static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
649{
650 static gen_helper_gvec_3 * const fns[4] = {
651 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
652 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
653 };
654 if (a->esz < 0) {
655 /* Invalid tsz encoding -- see tszimm_esz. */
656 return false;
657 }
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a->imm = MIN(a->imm, (8 << a->esz) - 1);
661 return do_zpzi_ool(s, a, fns[a->esz]);
662}
663
3a7be554 664static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
665{
666 static gen_helper_gvec_3 * const fns[4] = {
667 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
668 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
669 };
670 if (a->esz < 0) {
671 return false;
672 }
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a->imm >= (8 << a->esz)) {
60245996 676 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
677 } else {
678 return do_zpzi_ool(s, a, fns[a->esz]);
679 }
680}
681
3a7be554 682static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
683{
684 static gen_helper_gvec_3 * const fns[4] = {
685 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
686 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
687 };
688 if (a->esz < 0) {
689 return false;
690 }
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a->imm >= (8 << a->esz)) {
60245996 694 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
695 } else {
696 return do_zpzi_ool(s, a, fns[a->esz]);
697 }
698}
699
3a7be554 700static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
701{
702 static gen_helper_gvec_3 * const fns[4] = {
703 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
704 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
705 };
706 if (a->esz < 0) {
707 return false;
708 }
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a->imm >= (8 << a->esz)) {
60245996 712 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
713 } else {
714 return do_zpzi_ool(s, a, fns[a->esz]);
715 }
716}
717
fe7f8dfb
RH
718/*
719 *** SVE Bitwise Shift - Predicated Group
720 */
721
722#define DO_ZPZW(NAME, name) \
3a7be554 723static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
724{ \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
728 }; \
729 if (a->esz < 0 || a->esz >= 3) { \
730 return false; \
731 } \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
733}
734
735DO_ZPZW(ASR, asr)
736DO_ZPZW(LSR, lsr)
737DO_ZPZW(LSL, lsl)
738
739#undef DO_ZPZW
740
d9d78dcc
RH
741/*
742 *** SVE Bitwise Shift - Unpredicated Group
743 */
744
745static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
746 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
748{
749 if (a->esz < 0) {
750 /* Invalid tsz encoding -- see tszimm_esz. */
751 return false;
752 }
753 if (sve_access_check(s)) {
754 unsigned vsz = vec_full_reg_size(s);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a->imm >= 8 << a->esz) {
759 if (asr) {
760 a->imm = (8 << a->esz) - 1;
761 } else {
762 do_dupi_z(s, a->rd, 0);
763 return true;
764 }
765 }
766 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
767 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
768 }
769 return true;
770}
771
3a7be554 772static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
773{
774 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
775}
776
3a7be554 777static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
778{
779 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
780}
781
3a7be554 782static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
783{
784 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
785}
786
787static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
788{
789 if (fn == NULL) {
790 return false;
791 }
792 if (sve_access_check(s)) {
e645d1a1 793 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
794 }
795 return true;
796}
797
798#define DO_ZZW(NAME, name) \
3a7be554 799static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
800{ \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
804 }; \
805 return do_zzw_ool(s, a, fns[a->esz]); \
806}
807
808DO_ZZW(ASR, asr)
809DO_ZZW(LSR, lsr)
810DO_ZZW(LSL, lsl)
811
812#undef DO_ZZW
813
96a36e4a
RH
814/*
815 *** SVE Integer Multiply-Add Group
816 */
817
818static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
819 gen_helper_gvec_5 *fn)
820{
821 if (sve_access_check(s)) {
822 unsigned vsz = vec_full_reg_size(s);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
824 vec_full_reg_offset(s, a->ra),
825 vec_full_reg_offset(s, a->rn),
826 vec_full_reg_offset(s, a->rm),
827 pred_full_reg_offset(s, a->pg),
828 vsz, vsz, 0, fn);
829 }
830 return true;
831}
832
833#define DO_ZPZZZ(NAME, name) \
3a7be554 834static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
835{ \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
839 }; \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
841}
842
843DO_ZPZZZ(MLA, mla)
844DO_ZPZZZ(MLS, mls)
845
846#undef DO_ZPZZZ
847
9a56c9c3
RH
848/*
849 *** SVE Index Generation Group
850 */
851
852static void do_index(DisasContext *s, int esz, int rd,
853 TCGv_i64 start, TCGv_i64 incr)
854{
855 unsigned vsz = vec_full_reg_size(s);
856 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
857 TCGv_ptr t_zd = tcg_temp_new_ptr();
858
859 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
860 if (esz == 3) {
861 gen_helper_sve_index_d(t_zd, start, incr, desc);
862 } else {
863 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
864 static index_fn * const fns[3] = {
865 gen_helper_sve_index_b,
866 gen_helper_sve_index_h,
867 gen_helper_sve_index_s,
868 };
869 TCGv_i32 s32 = tcg_temp_new_i32();
870 TCGv_i32 i32 = tcg_temp_new_i32();
871
872 tcg_gen_extrl_i64_i32(s32, start);
873 tcg_gen_extrl_i64_i32(i32, incr);
874 fns[esz](t_zd, s32, i32, desc);
875
876 tcg_temp_free_i32(s32);
877 tcg_temp_free_i32(i32);
878 }
879 tcg_temp_free_ptr(t_zd);
880 tcg_temp_free_i32(desc);
881}
882
3a7be554 883static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
884{
885 if (sve_access_check(s)) {
886 TCGv_i64 start = tcg_const_i64(a->imm1);
887 TCGv_i64 incr = tcg_const_i64(a->imm2);
888 do_index(s, a->esz, a->rd, start, incr);
889 tcg_temp_free_i64(start);
890 tcg_temp_free_i64(incr);
891 }
892 return true;
893}
894
3a7be554 895static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
896{
897 if (sve_access_check(s)) {
898 TCGv_i64 start = tcg_const_i64(a->imm);
899 TCGv_i64 incr = cpu_reg(s, a->rm);
900 do_index(s, a->esz, a->rd, start, incr);
901 tcg_temp_free_i64(start);
902 }
903 return true;
904}
905
3a7be554 906static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
907{
908 if (sve_access_check(s)) {
909 TCGv_i64 start = cpu_reg(s, a->rn);
910 TCGv_i64 incr = tcg_const_i64(a->imm);
911 do_index(s, a->esz, a->rd, start, incr);
912 tcg_temp_free_i64(incr);
913 }
914 return true;
915}
916
3a7be554 917static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
918{
919 if (sve_access_check(s)) {
920 TCGv_i64 start = cpu_reg(s, a->rn);
921 TCGv_i64 incr = cpu_reg(s, a->rm);
922 do_index(s, a->esz, a->rd, start, incr);
923 }
924 return true;
925}
926
96f922cc
RH
927/*
928 *** SVE Stack Allocation Group
929 */
930
3a7be554 931static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 932{
5de56742
AC
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
937 }
96f922cc
RH
938 return true;
939}
940
3a7be554 941static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 942{
5de56742
AC
943 if (sve_access_check(s)) {
944 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
945 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
946 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
947 }
96f922cc
RH
948 return true;
949}
950
3a7be554 951static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 952{
5de56742
AC
953 if (sve_access_check(s)) {
954 TCGv_i64 reg = cpu_reg(s, a->rd);
955 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
956 }
96f922cc
RH
957 return true;
958}
959
4b242d9c
RH
960/*
961 *** SVE Compute Vector Address Group
962 */
963
964static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
965{
966 if (sve_access_check(s)) {
e645d1a1 967 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
968 }
969 return true;
970}
971
3a7be554 972static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
973{
974 return do_adr(s, a, gen_helper_sve_adr_p32);
975}
976
3a7be554 977static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
978{
979 return do_adr(s, a, gen_helper_sve_adr_p64);
980}
981
3a7be554 982static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
983{
984 return do_adr(s, a, gen_helper_sve_adr_s32);
985}
986
3a7be554 987static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
988{
989 return do_adr(s, a, gen_helper_sve_adr_u32);
990}
991
0762cd42
RH
992/*
993 *** SVE Integer Misc - Unpredicated Group
994 */
995
3a7be554 996static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
997{
998 static gen_helper_gvec_2 * const fns[4] = {
999 NULL,
1000 gen_helper_sve_fexpa_h,
1001 gen_helper_sve_fexpa_s,
1002 gen_helper_sve_fexpa_d,
1003 };
1004 if (a->esz == 0) {
1005 return false;
1006 }
1007 if (sve_access_check(s)) {
40e32e5a 1008 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1009 }
1010 return true;
1011}
1012
3a7be554 1013static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1014{
1015 static gen_helper_gvec_3 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_ftssel_h,
1018 gen_helper_sve_ftssel_s,
1019 gen_helper_sve_ftssel_d,
1020 };
1021 if (a->esz == 0) {
1022 return false;
1023 }
1024 if (sve_access_check(s)) {
e645d1a1 1025 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1026 }
1027 return true;
1028}
1029
516e246a
RH
1030/*
1031 *** SVE Predicate Logical Operations Group
1032 */
1033
1034static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1035 const GVecGen4 *gvec_op)
1036{
1037 if (!sve_access_check(s)) {
1038 return true;
1039 }
1040
1041 unsigned psz = pred_gvec_reg_size(s);
1042 int dofs = pred_full_reg_offset(s, a->rd);
1043 int nofs = pred_full_reg_offset(s, a->rn);
1044 int mofs = pred_full_reg_offset(s, a->rm);
1045 int gofs = pred_full_reg_offset(s, a->pg);
1046
dd81a8d7
RH
1047 if (!a->s) {
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 return true;
1050 }
1051
516e246a
RH
1052 if (psz == 8) {
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd = tcg_temp_new_i64();
1055 TCGv_i64 pn = tcg_temp_new_i64();
1056 TCGv_i64 pm = tcg_temp_new_i64();
1057 TCGv_i64 pg = tcg_temp_new_i64();
1058
1059 tcg_gen_ld_i64(pn, cpu_env, nofs);
1060 tcg_gen_ld_i64(pm, cpu_env, mofs);
1061 tcg_gen_ld_i64(pg, cpu_env, gofs);
1062
1063 gvec_op->fni8(pd, pn, pm, pg);
1064 tcg_gen_st_i64(pd, cpu_env, dofs);
1065
1066 do_predtest1(pd, pg);
1067
1068 tcg_temp_free_i64(pd);
1069 tcg_temp_free_i64(pn);
1070 tcg_temp_free_i64(pm);
1071 tcg_temp_free_i64(pg);
1072 } else {
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1077 */
1078 int tofs = gofs;
1079 if (a->rd == a->pg) {
1080 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1081 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1082 }
1083
1084 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1085 do_predtest(s, dofs, tofs, psz / 8);
1086 }
1087 return true;
1088}
1089
1090static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1091{
1092 tcg_gen_and_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1094}
1095
1096static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1098{
1099 tcg_gen_and_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1101}
1102
3a7be554 1103static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1104{
1105 static const GVecGen4 op = {
1106 .fni8 = gen_and_pg_i64,
1107 .fniv = gen_and_pg_vec,
1108 .fno = gen_helper_sve_and_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1110 };
dd81a8d7
RH
1111
1112 if (!a->s) {
1113 if (!sve_access_check(s)) {
1114 return true;
1115 }
1116 if (a->rn == a->rm) {
1117 if (a->pg == a->rn) {
1118 do_mov_p(s, a->rd, a->rn);
1119 } else {
1120 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1121 }
1122 return true;
1123 } else if (a->pg == a->rn || a->pg == a->rm) {
1124 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1125 return true;
516e246a 1126 }
516e246a 1127 }
dd81a8d7 1128 return do_pppp_flags(s, a, &op);
516e246a
RH
1129}
1130
1131static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1132{
1133 tcg_gen_andc_i64(pd, pn, pm);
1134 tcg_gen_and_i64(pd, pd, pg);
1135}
1136
1137static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1138 TCGv_vec pm, TCGv_vec pg)
1139{
1140 tcg_gen_andc_vec(vece, pd, pn, pm);
1141 tcg_gen_and_vec(vece, pd, pd, pg);
1142}
1143
3a7be554 1144static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1145{
1146 static const GVecGen4 op = {
1147 .fni8 = gen_bic_pg_i64,
1148 .fniv = gen_bic_pg_vec,
1149 .fno = gen_helper_sve_bic_pppp,
1150 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1151 };
dd81a8d7
RH
1152
1153 if (!a->s && a->pg == a->rn) {
1154 if (sve_access_check(s)) {
1155 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1156 }
1157 return true;
516e246a 1158 }
dd81a8d7 1159 return do_pppp_flags(s, a, &op);
516e246a
RH
1160}
1161
1162static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1163{
1164 tcg_gen_xor_i64(pd, pn, pm);
1165 tcg_gen_and_i64(pd, pd, pg);
1166}
1167
1168static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1169 TCGv_vec pm, TCGv_vec pg)
1170{
1171 tcg_gen_xor_vec(vece, pd, pn, pm);
1172 tcg_gen_and_vec(vece, pd, pd, pg);
1173}
1174
3a7be554 1175static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1176{
1177 static const GVecGen4 op = {
1178 .fni8 = gen_eor_pg_i64,
1179 .fniv = gen_eor_pg_vec,
1180 .fno = gen_helper_sve_eor_pppp,
1181 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1182 };
dd81a8d7 1183 return do_pppp_flags(s, a, &op);
516e246a
RH
1184}
1185
3a7be554 1186static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1187{
516e246a
RH
1188 if (a->s) {
1189 return false;
516e246a 1190 }
d4bc6232
RH
1191 if (sve_access_check(s)) {
1192 unsigned psz = pred_gvec_reg_size(s);
1193 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1194 pred_full_reg_offset(s, a->pg),
1195 pred_full_reg_offset(s, a->rn),
1196 pred_full_reg_offset(s, a->rm), psz, psz);
1197 }
1198 return true;
516e246a
RH
1199}
1200
1201static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1202{
1203 tcg_gen_or_i64(pd, pn, pm);
1204 tcg_gen_and_i64(pd, pd, pg);
1205}
1206
1207static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1208 TCGv_vec pm, TCGv_vec pg)
1209{
1210 tcg_gen_or_vec(vece, pd, pn, pm);
1211 tcg_gen_and_vec(vece, pd, pd, pg);
1212}
1213
3a7be554 1214static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1215{
1216 static const GVecGen4 op = {
1217 .fni8 = gen_orr_pg_i64,
1218 .fniv = gen_orr_pg_vec,
1219 .fno = gen_helper_sve_orr_pppp,
1220 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1221 };
dd81a8d7
RH
1222
1223 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1224 return do_mov_p(s, a->rd, a->rn);
516e246a 1225 }
dd81a8d7 1226 return do_pppp_flags(s, a, &op);
516e246a
RH
1227}
1228
1229static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1230{
1231 tcg_gen_orc_i64(pd, pn, pm);
1232 tcg_gen_and_i64(pd, pd, pg);
1233}
1234
1235static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1236 TCGv_vec pm, TCGv_vec pg)
1237{
1238 tcg_gen_orc_vec(vece, pd, pn, pm);
1239 tcg_gen_and_vec(vece, pd, pd, pg);
1240}
1241
3a7be554 1242static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1243{
1244 static const GVecGen4 op = {
1245 .fni8 = gen_orn_pg_i64,
1246 .fniv = gen_orn_pg_vec,
1247 .fno = gen_helper_sve_orn_pppp,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1249 };
dd81a8d7 1250 return do_pppp_flags(s, a, &op);
516e246a
RH
1251}
1252
1253static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254{
1255 tcg_gen_or_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1257}
1258
1259static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1261{
1262 tcg_gen_or_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1264}
1265
3a7be554 1266static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1267{
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nor_pg_i64,
1270 .fniv = gen_nor_pg_vec,
1271 .fno = gen_helper_sve_nor_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1273 };
dd81a8d7 1274 return do_pppp_flags(s, a, &op);
516e246a
RH
1275}
1276
1277static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1278{
1279 tcg_gen_and_i64(pd, pn, pm);
1280 tcg_gen_andc_i64(pd, pg, pd);
1281}
1282
1283static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1284 TCGv_vec pm, TCGv_vec pg)
1285{
1286 tcg_gen_and_vec(vece, pd, pn, pm);
1287 tcg_gen_andc_vec(vece, pd, pg, pd);
1288}
1289
3a7be554 1290static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1291{
1292 static const GVecGen4 op = {
1293 .fni8 = gen_nand_pg_i64,
1294 .fniv = gen_nand_pg_vec,
1295 .fno = gen_helper_sve_nand_pppp,
1296 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1297 };
dd81a8d7 1298 return do_pppp_flags(s, a, &op);
516e246a
RH
1299}
1300
9e18d7a6
RH
1301/*
1302 *** SVE Predicate Misc Group
1303 */
1304
3a7be554 1305static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1306{
1307 if (sve_access_check(s)) {
1308 int nofs = pred_full_reg_offset(s, a->rn);
1309 int gofs = pred_full_reg_offset(s, a->pg);
1310 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1311
1312 if (words == 1) {
1313 TCGv_i64 pn = tcg_temp_new_i64();
1314 TCGv_i64 pg = tcg_temp_new_i64();
1315
1316 tcg_gen_ld_i64(pn, cpu_env, nofs);
1317 tcg_gen_ld_i64(pg, cpu_env, gofs);
1318 do_predtest1(pn, pg);
1319
1320 tcg_temp_free_i64(pn);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 do_predtest(s, nofs, gofs, words);
1324 }
1325 }
1326 return true;
1327}
1328
028e2a7b
RH
1329/* See the ARM pseudocode DecodePredCount. */
1330static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1331{
1332 unsigned elements = fullsz >> esz;
1333 unsigned bound;
1334
1335 switch (pattern) {
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements);
1338 case 0x1: /* VL1 */
1339 case 0x2: /* VL2 */
1340 case 0x3: /* VL3 */
1341 case 0x4: /* VL4 */
1342 case 0x5: /* VL5 */
1343 case 0x6: /* VL6 */
1344 case 0x7: /* VL7 */
1345 case 0x8: /* VL8 */
1346 bound = pattern;
1347 break;
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound = 16 << (pattern - 9);
1354 break;
1355 case 0x1d: /* MUL4 */
1356 return elements - elements % 4;
1357 case 0x1e: /* MUL3 */
1358 return elements - elements % 3;
1359 case 0x1f: /* ALL */
1360 return elements;
1361 default: /* #uimm5 */
1362 return 0;
1363 }
1364 return elements >= bound ? bound : 0;
1365}
1366
1367/* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1371 */
1372static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1373{
1374 if (!sve_access_check(s)) {
1375 return true;
1376 }
1377
1378 unsigned fullsz = vec_full_reg_size(s);
1379 unsigned ofs = pred_full_reg_offset(s, rd);
1380 unsigned numelem, setsz, i;
1381 uint64_t word, lastword;
1382 TCGv_i64 t;
1383
1384 numelem = decode_pred_count(fullsz, pat, esz);
1385
1386 /* Determine what we must store into each bit, and how many. */
1387 if (numelem == 0) {
1388 lastword = word = 0;
1389 setsz = fullsz;
1390 } else {
1391 setsz = numelem << esz;
1392 lastword = word = pred_esz_masks[esz];
1393 if (setsz % 64) {
973558a3 1394 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1395 }
1396 }
1397
1398 t = tcg_temp_new_i64();
1399 if (fullsz <= 64) {
1400 tcg_gen_movi_i64(t, lastword);
1401 tcg_gen_st_i64(t, cpu_env, ofs);
1402 goto done;
1403 }
1404
1405 if (word == lastword) {
1406 unsigned maxsz = size_for_gvec(fullsz / 8);
1407 unsigned oprsz = size_for_gvec(setsz / 8);
1408
1409 if (oprsz * 8 == setsz) {
8711e71f 1410 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1411 goto done;
1412 }
028e2a7b
RH
1413 }
1414
1415 setsz /= 8;
1416 fullsz /= 8;
1417
1418 tcg_gen_movi_i64(t, word);
973558a3 1419 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1420 tcg_gen_st_i64(t, cpu_env, ofs + i);
1421 }
1422 if (lastword != word) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs + i);
1425 i += 8;
1426 }
1427 if (i < fullsz) {
1428 tcg_gen_movi_i64(t, 0);
1429 for (; i < fullsz; i += 8) {
1430 tcg_gen_st_i64(t, cpu_env, ofs + i);
1431 }
1432 }
1433
1434 done:
1435 tcg_temp_free_i64(t);
1436
1437 /* PTRUES */
1438 if (setflag) {
1439 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1440 tcg_gen_movi_i32(cpu_CF, word == 0);
1441 tcg_gen_movi_i32(cpu_VF, 0);
1442 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1443 }
1444 return true;
1445}
1446
3a7be554 1447static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1448{
1449 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1450}
1451
3a7be554 1452static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1453{
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1456}
1457
3a7be554 1458static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1459{
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s, 0, a->rd, 32, false);
1462}
1463
3a7be554 1464static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1465{
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1468 */
1469 arg_rprr_s alt_a = {
1470 .rd = a->rd, .pg = a->pg, .s = a->s,
1471 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1472 };
3a7be554 1473 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1474}
1475
3a7be554 1476static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1477{
1478 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1479}
1480
3a7be554 1481static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1482{
1483 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1484}
1485
1486static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1487 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1488 TCGv_ptr, TCGv_i32))
1489{
1490 if (!sve_access_check(s)) {
1491 return true;
1492 }
1493
1494 TCGv_ptr t_pd = tcg_temp_new_ptr();
1495 TCGv_ptr t_pg = tcg_temp_new_ptr();
1496 TCGv_i32 t;
86300b5d 1497 unsigned desc = 0;
028e2a7b 1498
86300b5d
RH
1499 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1500 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1501
1502 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1504 t = tcg_const_i32(desc);
1505
1506 gen_fn(t, t_pd, t_pg, t);
1507 tcg_temp_free_ptr(t_pd);
1508 tcg_temp_free_ptr(t_pg);
1509
1510 do_pred_flags(t);
1511 tcg_temp_free_i32(t);
1512 return true;
1513}
1514
3a7be554 1515static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1516{
1517 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1518}
1519
3a7be554 1520static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1521{
1522 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1523}
1524
24e82e68
RH
1525/*
1526 *** SVE Element Count Group
1527 */
1528
1529/* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1532 */
1533static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1534{
1535 int64_t ibound;
1536 TCGv_i64 bound;
1537 TCGCond cond;
1538
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1540 if (u) {
1541 tcg_gen_ext32u_i64(reg, reg);
1542 } else {
1543 tcg_gen_ext32s_i64(reg, reg);
1544 }
1545 if (d) {
1546 tcg_gen_sub_i64(reg, reg, val);
1547 ibound = (u ? 0 : INT32_MIN);
1548 cond = TCG_COND_LT;
1549 } else {
1550 tcg_gen_add_i64(reg, reg, val);
1551 ibound = (u ? UINT32_MAX : INT32_MAX);
1552 cond = TCG_COND_GT;
1553 }
1554 bound = tcg_const_i64(ibound);
1555 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1556 tcg_temp_free_i64(bound);
1557}
1558
1559/* Similarly with 64-bit values. */
1560static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1561{
1562 TCGv_i64 t0 = tcg_temp_new_i64();
1563 TCGv_i64 t1 = tcg_temp_new_i64();
1564 TCGv_i64 t2;
1565
1566 if (u) {
1567 if (d) {
1568 tcg_gen_sub_i64(t0, reg, val);
1569 tcg_gen_movi_i64(t1, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1571 } else {
1572 tcg_gen_add_i64(t0, reg, val);
1573 tcg_gen_movi_i64(t1, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1575 }
1576 } else {
1577 if (d) {
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0, reg, val);
1580 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1581 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1582 tcg_gen_and_i64(t0, t0, reg);
1583
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg, INT64_MIN);
1586 t2 = tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1588 } else {
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0, reg, val);
1591 tcg_gen_add_i64(reg, reg, val);
1592 tcg_gen_xor_i64(t1, reg, val);
1593 tcg_gen_andc_i64(t0, t1, t0);
1594
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1, INT64_MAX);
1597 t2 = tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1599 }
1600 tcg_temp_free_i64(t2);
1601 }
1602 tcg_temp_free_i64(t0);
1603 tcg_temp_free_i64(t1);
1604}
1605
1606/* Similarly with a vector and a scalar operand. */
1607static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1608 TCGv_i64 val, bool u, bool d)
1609{
1610 unsigned vsz = vec_full_reg_size(s);
1611 TCGv_ptr dptr, nptr;
1612 TCGv_i32 t32, desc;
1613 TCGv_i64 t64;
1614
1615 dptr = tcg_temp_new_ptr();
1616 nptr = tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1618 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1619 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1620
1621 switch (esz) {
1622 case MO_8:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1627 }
1628 if (u) {
1629 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1632 }
1633 tcg_temp_free_i32(t32);
1634 break;
1635
1636 case MO_16:
1637 t32 = tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32, val);
1639 if (d) {
1640 tcg_gen_neg_i32(t32, t32);
1641 }
1642 if (u) {
1643 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1644 } else {
1645 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1646 }
1647 tcg_temp_free_i32(t32);
1648 break;
1649
1650 case MO_32:
1651 t64 = tcg_temp_new_i64();
1652 if (d) {
1653 tcg_gen_neg_i64(t64, val);
1654 } else {
1655 tcg_gen_mov_i64(t64, val);
1656 }
1657 if (u) {
1658 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1661 }
1662 tcg_temp_free_i64(t64);
1663 break;
1664
1665 case MO_64:
1666 if (u) {
1667 if (d) {
1668 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1669 } else {
1670 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1671 }
1672 } else if (d) {
1673 t64 = tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64, val);
1675 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1676 tcg_temp_free_i64(t64);
1677 } else {
1678 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1679 }
1680 break;
1681
1682 default:
1683 g_assert_not_reached();
1684 }
1685
1686 tcg_temp_free_ptr(dptr);
1687 tcg_temp_free_ptr(nptr);
1688 tcg_temp_free_i32(desc);
1689}
1690
3a7be554 1691static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1692{
1693 if (sve_access_check(s)) {
1694 unsigned fullsz = vec_full_reg_size(s);
1695 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1696 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1697 }
1698 return true;
1699}
1700
3a7be554 1701static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1702{
1703 if (sve_access_check(s)) {
1704 unsigned fullsz = vec_full_reg_size(s);
1705 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1706 int inc = numelem * a->imm * (a->d ? -1 : 1);
1707 TCGv_i64 reg = cpu_reg(s, a->rd);
1708
1709 tcg_gen_addi_i64(reg, reg, inc);
1710 }
1711 return true;
1712}
1713
3a7be554 1714static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1715{
1716 if (!sve_access_check(s)) {
1717 return true;
1718 }
1719
1720 unsigned fullsz = vec_full_reg_size(s);
1721 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1722 int inc = numelem * a->imm;
1723 TCGv_i64 reg = cpu_reg(s, a->rd);
1724
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1726 if (inc == 0) {
1727 if (a->u) {
1728 tcg_gen_ext32u_i64(reg, reg);
1729 } else {
1730 tcg_gen_ext32s_i64(reg, reg);
1731 }
1732 } else {
1733 TCGv_i64 t = tcg_const_i64(inc);
1734 do_sat_addsub_32(reg, t, a->u, a->d);
1735 tcg_temp_free_i64(t);
1736 }
1737 return true;
1738}
1739
3a7be554 1740static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1741{
1742 if (!sve_access_check(s)) {
1743 return true;
1744 }
1745
1746 unsigned fullsz = vec_full_reg_size(s);
1747 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1748 int inc = numelem * a->imm;
1749 TCGv_i64 reg = cpu_reg(s, a->rd);
1750
1751 if (inc != 0) {
1752 TCGv_i64 t = tcg_const_i64(inc);
1753 do_sat_addsub_64(reg, t, a->u, a->d);
1754 tcg_temp_free_i64(t);
1755 }
1756 return true;
1757}
1758
3a7be554 1759static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1760{
1761 if (a->esz == 0) {
1762 return false;
1763 }
1764
1765 unsigned fullsz = vec_full_reg_size(s);
1766 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1767 int inc = numelem * a->imm;
1768
1769 if (inc != 0) {
1770 if (sve_access_check(s)) {
1771 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1772 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1773 vec_full_reg_offset(s, a->rn),
1774 t, fullsz, fullsz);
1775 tcg_temp_free_i64(t);
1776 }
1777 } else {
1778 do_mov_z(s, a->rd, a->rn);
1779 }
1780 return true;
1781}
1782
3a7be554 1783static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1784{
1785 if (a->esz == 0) {
1786 return false;
1787 }
1788
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1792
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1798 }
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1801 }
1802 return true;
1803}
1804
e1fa1164
RH
1805/*
1806 *** SVE Bitwise Immediate Group
1807 */
1808
1809static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1810{
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1816 }
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1821 }
1822 return true;
1823}
1824
3a7be554 1825static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1826{
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1828}
1829
3a7be554 1830static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1831{
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1833}
1834
3a7be554 1835static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1836{
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1838}
1839
3a7be554 1840static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1841{
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1847 }
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1850 }
1851 return true;
1852}
1853
f25a2361
RH
1854/*
1855 *** SVE Integer Wide Immediate - Predicated Group
1856 */
1857
1858/* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1860 */
1861static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1863{
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1868 };
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1874
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1878
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1880
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1885}
1886
3a7be554 1887static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
1888{
1889 if (a->esz == 0) {
1890 return false;
1891 }
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1898 }
1899 return true;
1900}
1901
3a7be554 1902static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 1903{
3a7be554 1904 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1905 return false;
1906 }
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1911 }
1912 return true;
1913}
1914
3a7be554 1915static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
1916{
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1920 };
1921
3a7be554 1922 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1923 return false;
1924 }
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1932 }
1933 return true;
1934}
1935
b94f8f60
RH
1936/*
1937 *** SVE Permute Extract Group
1938 */
1939
3a7be554 1940static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
1941{
1942 if (!sve_access_check(s)) {
1943 return true;
1944 }
1945
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1952
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1955 */
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1963 }
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1966 }
1967 return true;
1968}
1969
30562ab7
RH
1970/*
1971 *** SVE Permute - Unpredicated Group
1972 */
1973
3a7be554 1974static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
1975{
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1980 }
1981 return true;
1982}
1983
3a7be554 1984static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
1985{
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1988 }
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1993
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1996
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
7e17d50e
RH
2001 /*
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2004 */
2005 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2006 }
2007 }
2008 return true;
2009}
2010
2011static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2012{
2013 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2014 static gen_insr * const fns[4] = {
2015 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2016 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2017 };
2018 unsigned vsz = vec_full_reg_size(s);
2019 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2020 TCGv_ptr t_zd = tcg_temp_new_ptr();
2021 TCGv_ptr t_zn = tcg_temp_new_ptr();
2022
2023 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2024 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2025
2026 fns[a->esz](t_zd, t_zn, val, desc);
2027
2028 tcg_temp_free_ptr(t_zd);
2029 tcg_temp_free_ptr(t_zn);
2030 tcg_temp_free_i32(desc);
2031}
2032
3a7be554 2033static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2034{
2035 if (sve_access_check(s)) {
2036 TCGv_i64 t = tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2038 do_insr_i64(s, a, t);
2039 tcg_temp_free_i64(t);
2040 }
2041 return true;
2042}
2043
3a7be554 2044static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2045{
2046 if (sve_access_check(s)) {
2047 do_insr_i64(s, a, cpu_reg(s, a->rm));
2048 }
2049 return true;
2050}
2051
3a7be554 2052static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2053{
2054 static gen_helper_gvec_2 * const fns[4] = {
2055 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2056 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2057 };
2058
2059 if (sve_access_check(s)) {
40e32e5a 2060 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2061 }
2062 return true;
2063}
2064
3a7be554 2065static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2066{
2067 static gen_helper_gvec_3 * const fns[4] = {
2068 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2069 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2070 };
2071
2072 if (sve_access_check(s)) {
e645d1a1 2073 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2074 }
2075 return true;
2076}
2077
3a7be554 2078static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2079{
2080 static gen_helper_gvec_2 * const fns[4][2] = {
2081 { NULL, NULL },
2082 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2083 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2084 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2085 };
2086
2087 if (a->esz == 0) {
2088 return false;
2089 }
2090 if (sve_access_check(s)) {
2091 unsigned vsz = vec_full_reg_size(s);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2093 vec_full_reg_offset(s, a->rn)
2094 + (a->h ? vsz / 2 : 0),
2095 vsz, vsz, 0, fns[a->esz][a->u]);
2096 }
2097 return true;
2098}
2099
d731d8cb
RH
2100/*
2101 *** SVE Permute - Predicates Group
2102 */
2103
2104static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2105 gen_helper_gvec_3 *fn)
2106{
2107 if (!sve_access_check(s)) {
2108 return true;
2109 }
2110
2111 unsigned vsz = pred_full_reg_size(s);
2112
d731d8cb
RH
2113 TCGv_ptr t_d = tcg_temp_new_ptr();
2114 TCGv_ptr t_n = tcg_temp_new_ptr();
2115 TCGv_ptr t_m = tcg_temp_new_ptr();
2116 TCGv_i32 t_desc;
f9b0fcce 2117 uint32_t desc = 0;
d731d8cb 2118
f9b0fcce
RH
2119 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2120 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2121 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2122
2123 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2124 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2125 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2126 t_desc = tcg_const_i32(desc);
2127
2128 fn(t_d, t_n, t_m, t_desc);
2129
2130 tcg_temp_free_ptr(t_d);
2131 tcg_temp_free_ptr(t_n);
2132 tcg_temp_free_ptr(t_m);
2133 tcg_temp_free_i32(t_desc);
2134 return true;
2135}
2136
2137static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2138 gen_helper_gvec_2 *fn)
2139{
2140 if (!sve_access_check(s)) {
2141 return true;
2142 }
2143
2144 unsigned vsz = pred_full_reg_size(s);
2145 TCGv_ptr t_d = tcg_temp_new_ptr();
2146 TCGv_ptr t_n = tcg_temp_new_ptr();
2147 TCGv_i32 t_desc;
70acaafe 2148 uint32_t desc = 0;
d731d8cb
RH
2149
2150 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2151 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2152
70acaafe
RH
2153 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2154 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2155 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2156 t_desc = tcg_const_i32(desc);
2157
2158 fn(t_d, t_n, t_desc);
2159
2160 tcg_temp_free_i32(t_desc);
2161 tcg_temp_free_ptr(t_d);
2162 tcg_temp_free_ptr(t_n);
2163 return true;
2164}
2165
3a7be554 2166static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2167{
2168 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2169}
2170
3a7be554 2171static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2172{
2173 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2174}
2175
3a7be554 2176static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2177{
2178 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2179}
2180
3a7be554 2181static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2182{
2183 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2184}
2185
3a7be554 2186static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2187{
2188 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2189}
2190
3a7be554 2191static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2192{
2193 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2194}
2195
3a7be554 2196static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2197{
2198 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2199}
2200
3a7be554 2201static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2202{
2203 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2204}
2205
3a7be554 2206static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2207{
2208 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2209}
2210
234b48e9
RH
2211/*
2212 *** SVE Permute - Interleaving Group
2213 */
2214
2215static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2216{
2217 static gen_helper_gvec_3 * const fns[4] = {
2218 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2219 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2220 };
2221
2222 if (sve_access_check(s)) {
2223 unsigned vsz = vec_full_reg_size(s);
2224 unsigned high_ofs = high ? vsz / 2 : 0;
2225 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2226 vec_full_reg_offset(s, a->rn) + high_ofs,
2227 vec_full_reg_offset(s, a->rm) + high_ofs,
2228 vsz, vsz, 0, fns[a->esz]);
2229 }
2230 return true;
2231}
2232
2233static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2234 gen_helper_gvec_3 *fn)
2235{
2236 if (sve_access_check(s)) {
e645d1a1 2237 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2238 }
2239 return true;
2240}
2241
3a7be554 2242static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2243{
2244 return do_zip(s, a, false);
2245}
2246
3a7be554 2247static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2248{
2249 return do_zip(s, a, true);
2250}
2251
2252static gen_helper_gvec_3 * const uzp_fns[4] = {
2253 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2254 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2255};
2256
3a7be554 2257static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2258{
2259 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2260}
2261
3a7be554 2262static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2263{
2264 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2265}
2266
2267static gen_helper_gvec_3 * const trn_fns[4] = {
2268 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2269 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2270};
2271
3a7be554 2272static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2273{
2274 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2275}
2276
3a7be554 2277static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2278{
2279 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2280}
2281
3ca879ae
RH
2282/*
2283 *** SVE Permute Vector - Predicated Group
2284 */
2285
3a7be554 2286static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2287{
2288 static gen_helper_gvec_3 * const fns[4] = {
2289 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2290 };
2291 return do_zpz_ool(s, a, fns[a->esz]);
2292}
2293
ef23cb72
RH
2294/* Call the helper that computes the ARM LastActiveElement pseudocode
2295 * function, scaled by the element size. This includes the not found
2296 * indication; e.g. not found for esz=3 is -8.
2297 */
2298static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2299{
2300 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2301 * round up, as we do elsewhere, because we need the exact size.
2302 */
2303 TCGv_ptr t_p = tcg_temp_new_ptr();
2304 TCGv_i32 t_desc;
2305 unsigned vsz = pred_full_reg_size(s);
2306 unsigned desc;
2307
2308 desc = vsz - 2;
2309 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2310
2311 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2312 t_desc = tcg_const_i32(desc);
2313
2314 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2315
2316 tcg_temp_free_i32(t_desc);
2317 tcg_temp_free_ptr(t_p);
2318}
2319
2320/* Increment LAST to the offset of the next element in the vector,
2321 * wrapping around to 0.
2322 */
2323static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2324{
2325 unsigned vsz = vec_full_reg_size(s);
2326
2327 tcg_gen_addi_i32(last, last, 1 << esz);
2328 if (is_power_of_2(vsz)) {
2329 tcg_gen_andi_i32(last, last, vsz - 1);
2330 } else {
2331 TCGv_i32 max = tcg_const_i32(vsz);
2332 TCGv_i32 zero = tcg_const_i32(0);
2333 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2334 tcg_temp_free_i32(max);
2335 tcg_temp_free_i32(zero);
2336 }
2337}
2338
2339/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2340static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2341{
2342 unsigned vsz = vec_full_reg_size(s);
2343
2344 if (is_power_of_2(vsz)) {
2345 tcg_gen_andi_i32(last, last, vsz - 1);
2346 } else {
2347 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2348 TCGv_i32 zero = tcg_const_i32(0);
2349 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2350 tcg_temp_free_i32(max);
2351 tcg_temp_free_i32(zero);
2352 }
2353}
2354
2355/* Load an unsigned element of ESZ from BASE+OFS. */
2356static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2357{
2358 TCGv_i64 r = tcg_temp_new_i64();
2359
2360 switch (esz) {
2361 case 0:
2362 tcg_gen_ld8u_i64(r, base, ofs);
2363 break;
2364 case 1:
2365 tcg_gen_ld16u_i64(r, base, ofs);
2366 break;
2367 case 2:
2368 tcg_gen_ld32u_i64(r, base, ofs);
2369 break;
2370 case 3:
2371 tcg_gen_ld_i64(r, base, ofs);
2372 break;
2373 default:
2374 g_assert_not_reached();
2375 }
2376 return r;
2377}
2378
2379/* Load an unsigned element of ESZ from RM[LAST]. */
2380static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2381 int rm, int esz)
2382{
2383 TCGv_ptr p = tcg_temp_new_ptr();
2384 TCGv_i64 r;
2385
2386 /* Convert offset into vector into offset into ENV.
2387 * The final adjustment for the vector register base
2388 * is added via constant offset to the load.
2389 */
2390#ifdef HOST_WORDS_BIGENDIAN
2391 /* Adjust for element ordering. See vec_reg_offset. */
2392 if (esz < 3) {
2393 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2394 }
2395#endif
2396 tcg_gen_ext_i32_ptr(p, last);
2397 tcg_gen_add_ptr(p, p, cpu_env);
2398
2399 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2400 tcg_temp_free_ptr(p);
2401
2402 return r;
2403}
2404
2405/* Compute CLAST for a Zreg. */
2406static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2407{
2408 TCGv_i32 last;
2409 TCGLabel *over;
2410 TCGv_i64 ele;
2411 unsigned vsz, esz = a->esz;
2412
2413 if (!sve_access_check(s)) {
2414 return true;
2415 }
2416
2417 last = tcg_temp_local_new_i32();
2418 over = gen_new_label();
2419
2420 find_last_active(s, last, esz, a->pg);
2421
2422 /* There is of course no movcond for a 2048-bit vector,
2423 * so we must branch over the actual store.
2424 */
2425 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2426
2427 if (!before) {
2428 incr_last_active(s, last, esz);
2429 }
2430
2431 ele = load_last_active(s, last, a->rm, esz);
2432 tcg_temp_free_i32(last);
2433
2434 vsz = vec_full_reg_size(s);
2435 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2436 tcg_temp_free_i64(ele);
2437
2438 /* If this insn used MOVPRFX, we may need a second move. */
2439 if (a->rd != a->rn) {
2440 TCGLabel *done = gen_new_label();
2441 tcg_gen_br(done);
2442
2443 gen_set_label(over);
2444 do_mov_z(s, a->rd, a->rn);
2445
2446 gen_set_label(done);
2447 } else {
2448 gen_set_label(over);
2449 }
2450 return true;
2451}
2452
3a7be554 2453static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2454{
2455 return do_clast_vector(s, a, false);
2456}
2457
3a7be554 2458static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2459{
2460 return do_clast_vector(s, a, true);
2461}
2462
2463/* Compute CLAST for a scalar. */
2464static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2465 bool before, TCGv_i64 reg_val)
2466{
2467 TCGv_i32 last = tcg_temp_new_i32();
2468 TCGv_i64 ele, cmp, zero;
2469
2470 find_last_active(s, last, esz, pg);
2471
2472 /* Extend the original value of last prior to incrementing. */
2473 cmp = tcg_temp_new_i64();
2474 tcg_gen_ext_i32_i64(cmp, last);
2475
2476 if (!before) {
2477 incr_last_active(s, last, esz);
2478 }
2479
2480 /* The conceit here is that while last < 0 indicates not found, after
2481 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2482 * from which we can load garbage. We then discard the garbage with
2483 * a conditional move.
2484 */
2485 ele = load_last_active(s, last, rm, esz);
2486 tcg_temp_free_i32(last);
2487
2488 zero = tcg_const_i64(0);
2489 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2490
2491 tcg_temp_free_i64(zero);
2492 tcg_temp_free_i64(cmp);
2493 tcg_temp_free_i64(ele);
2494}
2495
2496/* Compute CLAST for a Vreg. */
2497static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2498{
2499 if (sve_access_check(s)) {
2500 int esz = a->esz;
2501 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2502 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2503
2504 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2505 write_fp_dreg(s, a->rd, reg);
2506 tcg_temp_free_i64(reg);
2507 }
2508 return true;
2509}
2510
3a7be554 2511static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2512{
2513 return do_clast_fp(s, a, false);
2514}
2515
3a7be554 2516static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2517{
2518 return do_clast_fp(s, a, true);
2519}
2520
2521/* Compute CLAST for a Xreg. */
2522static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2523{
2524 TCGv_i64 reg;
2525
2526 if (!sve_access_check(s)) {
2527 return true;
2528 }
2529
2530 reg = cpu_reg(s, a->rd);
2531 switch (a->esz) {
2532 case 0:
2533 tcg_gen_ext8u_i64(reg, reg);
2534 break;
2535 case 1:
2536 tcg_gen_ext16u_i64(reg, reg);
2537 break;
2538 case 2:
2539 tcg_gen_ext32u_i64(reg, reg);
2540 break;
2541 case 3:
2542 break;
2543 default:
2544 g_assert_not_reached();
2545 }
2546
2547 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2548 return true;
2549}
2550
3a7be554 2551static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2552{
2553 return do_clast_general(s, a, false);
2554}
2555
3a7be554 2556static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2557{
2558 return do_clast_general(s, a, true);
2559}
2560
2561/* Compute LAST for a scalar. */
2562static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2563 int pg, int rm, bool before)
2564{
2565 TCGv_i32 last = tcg_temp_new_i32();
2566 TCGv_i64 ret;
2567
2568 find_last_active(s, last, esz, pg);
2569 if (before) {
2570 wrap_last_active(s, last, esz);
2571 } else {
2572 incr_last_active(s, last, esz);
2573 }
2574
2575 ret = load_last_active(s, last, rm, esz);
2576 tcg_temp_free_i32(last);
2577 return ret;
2578}
2579
2580/* Compute LAST for a Vreg. */
2581static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2582{
2583 if (sve_access_check(s)) {
2584 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2585 write_fp_dreg(s, a->rd, val);
2586 tcg_temp_free_i64(val);
2587 }
2588 return true;
2589}
2590
3a7be554 2591static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2592{
2593 return do_last_fp(s, a, false);
2594}
2595
3a7be554 2596static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2597{
2598 return do_last_fp(s, a, true);
2599}
2600
2601/* Compute LAST for a Xreg. */
2602static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2603{
2604 if (sve_access_check(s)) {
2605 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2606 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2607 tcg_temp_free_i64(val);
2608 }
2609 return true;
2610}
2611
3a7be554 2612static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2613{
2614 return do_last_general(s, a, false);
2615}
2616
3a7be554 2617static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2618{
2619 return do_last_general(s, a, true);
2620}
2621
3a7be554 2622static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2623{
2624 if (sve_access_check(s)) {
2625 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2626 }
2627 return true;
2628}
2629
3a7be554 2630static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2631{
2632 if (sve_access_check(s)) {
2633 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2634 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2635 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2636 tcg_temp_free_i64(t);
2637 }
2638 return true;
2639}
2640
3a7be554 2641static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2642{
2643 static gen_helper_gvec_3 * const fns[4] = {
2644 NULL,
2645 gen_helper_sve_revb_h,
2646 gen_helper_sve_revb_s,
2647 gen_helper_sve_revb_d,
2648 };
2649 return do_zpz_ool(s, a, fns[a->esz]);
2650}
2651
3a7be554 2652static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2653{
2654 static gen_helper_gvec_3 * const fns[4] = {
2655 NULL,
2656 NULL,
2657 gen_helper_sve_revh_s,
2658 gen_helper_sve_revh_d,
2659 };
2660 return do_zpz_ool(s, a, fns[a->esz]);
2661}
2662
3a7be554 2663static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2664{
2665 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2666}
2667
3a7be554 2668static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2669{
2670 static gen_helper_gvec_3 * const fns[4] = {
2671 gen_helper_sve_rbit_b,
2672 gen_helper_sve_rbit_h,
2673 gen_helper_sve_rbit_s,
2674 gen_helper_sve_rbit_d,
2675 };
2676 return do_zpz_ool(s, a, fns[a->esz]);
2677}
2678
3a7be554 2679static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2680{
2681 if (sve_access_check(s)) {
36cbb7a8 2682 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 2683 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2684 }
2685 return true;
2686}
2687
757f9cff
RH
2688/*
2689 *** SVE Integer Compare - Vectors Group
2690 */
2691
2692static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2693 gen_helper_gvec_flags_4 *gen_fn)
2694{
2695 TCGv_ptr pd, zn, zm, pg;
2696 unsigned vsz;
2697 TCGv_i32 t;
2698
2699 if (gen_fn == NULL) {
2700 return false;
2701 }
2702 if (!sve_access_check(s)) {
2703 return true;
2704 }
2705
2706 vsz = vec_full_reg_size(s);
2707 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2708 pd = tcg_temp_new_ptr();
2709 zn = tcg_temp_new_ptr();
2710 zm = tcg_temp_new_ptr();
2711 pg = tcg_temp_new_ptr();
2712
2713 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2714 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2715 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2716 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2717
2718 gen_fn(t, pd, zn, zm, pg, t);
2719
2720 tcg_temp_free_ptr(pd);
2721 tcg_temp_free_ptr(zn);
2722 tcg_temp_free_ptr(zm);
2723 tcg_temp_free_ptr(pg);
2724
2725 do_pred_flags(t);
2726
2727 tcg_temp_free_i32(t);
2728 return true;
2729}
2730
2731#define DO_PPZZ(NAME, name) \
3a7be554 2732static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2733{ \
2734 static gen_helper_gvec_flags_4 * const fns[4] = { \
2735 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2736 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2737 }; \
2738 return do_ppzz_flags(s, a, fns[a->esz]); \
2739}
2740
2741DO_PPZZ(CMPEQ, cmpeq)
2742DO_PPZZ(CMPNE, cmpne)
2743DO_PPZZ(CMPGT, cmpgt)
2744DO_PPZZ(CMPGE, cmpge)
2745DO_PPZZ(CMPHI, cmphi)
2746DO_PPZZ(CMPHS, cmphs)
2747
2748#undef DO_PPZZ
2749
2750#define DO_PPZW(NAME, name) \
3a7be554 2751static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2752{ \
2753 static gen_helper_gvec_flags_4 * const fns[4] = { \
2754 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2755 gen_helper_sve_##name##_ppzw_s, NULL \
2756 }; \
2757 return do_ppzz_flags(s, a, fns[a->esz]); \
2758}
2759
2760DO_PPZW(CMPEQ, cmpeq)
2761DO_PPZW(CMPNE, cmpne)
2762DO_PPZW(CMPGT, cmpgt)
2763DO_PPZW(CMPGE, cmpge)
2764DO_PPZW(CMPHI, cmphi)
2765DO_PPZW(CMPHS, cmphs)
2766DO_PPZW(CMPLT, cmplt)
2767DO_PPZW(CMPLE, cmple)
2768DO_PPZW(CMPLO, cmplo)
2769DO_PPZW(CMPLS, cmpls)
2770
2771#undef DO_PPZW
2772
38cadeba
RH
2773/*
2774 *** SVE Integer Compare - Immediate Groups
2775 */
2776
2777static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2778 gen_helper_gvec_flags_3 *gen_fn)
2779{
2780 TCGv_ptr pd, zn, pg;
2781 unsigned vsz;
2782 TCGv_i32 t;
2783
2784 if (gen_fn == NULL) {
2785 return false;
2786 }
2787 if (!sve_access_check(s)) {
2788 return true;
2789 }
2790
2791 vsz = vec_full_reg_size(s);
2792 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2793 pd = tcg_temp_new_ptr();
2794 zn = tcg_temp_new_ptr();
2795 pg = tcg_temp_new_ptr();
2796
2797 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2798 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2799 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2800
2801 gen_fn(t, pd, zn, pg, t);
2802
2803 tcg_temp_free_ptr(pd);
2804 tcg_temp_free_ptr(zn);
2805 tcg_temp_free_ptr(pg);
2806
2807 do_pred_flags(t);
2808
2809 tcg_temp_free_i32(t);
2810 return true;
2811}
2812
2813#define DO_PPZI(NAME, name) \
3a7be554 2814static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
2815{ \
2816 static gen_helper_gvec_flags_3 * const fns[4] = { \
2817 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2818 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2819 }; \
2820 return do_ppzi_flags(s, a, fns[a->esz]); \
2821}
2822
2823DO_PPZI(CMPEQ, cmpeq)
2824DO_PPZI(CMPNE, cmpne)
2825DO_PPZI(CMPGT, cmpgt)
2826DO_PPZI(CMPGE, cmpge)
2827DO_PPZI(CMPHI, cmphi)
2828DO_PPZI(CMPHS, cmphs)
2829DO_PPZI(CMPLT, cmplt)
2830DO_PPZI(CMPLE, cmple)
2831DO_PPZI(CMPLO, cmplo)
2832DO_PPZI(CMPLS, cmpls)
2833
2834#undef DO_PPZI
2835
35da316f
RH
2836/*
2837 *** SVE Partition Break Group
2838 */
2839
2840static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2841 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2842{
2843 if (!sve_access_check(s)) {
2844 return true;
2845 }
2846
2847 unsigned vsz = pred_full_reg_size(s);
2848
2849 /* Predicate sizes may be smaller and cannot use simd_desc. */
2850 TCGv_ptr d = tcg_temp_new_ptr();
2851 TCGv_ptr n = tcg_temp_new_ptr();
2852 TCGv_ptr m = tcg_temp_new_ptr();
2853 TCGv_ptr g = tcg_temp_new_ptr();
2854 TCGv_i32 t = tcg_const_i32(vsz - 2);
2855
2856 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2857 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2858 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2859 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2860
2861 if (a->s) {
2862 fn_s(t, d, n, m, g, t);
2863 do_pred_flags(t);
2864 } else {
2865 fn(d, n, m, g, t);
2866 }
2867 tcg_temp_free_ptr(d);
2868 tcg_temp_free_ptr(n);
2869 tcg_temp_free_ptr(m);
2870 tcg_temp_free_ptr(g);
2871 tcg_temp_free_i32(t);
2872 return true;
2873}
2874
2875static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2876 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2877{
2878 if (!sve_access_check(s)) {
2879 return true;
2880 }
2881
2882 unsigned vsz = pred_full_reg_size(s);
2883
2884 /* Predicate sizes may be smaller and cannot use simd_desc. */
2885 TCGv_ptr d = tcg_temp_new_ptr();
2886 TCGv_ptr n = tcg_temp_new_ptr();
2887 TCGv_ptr g = tcg_temp_new_ptr();
2888 TCGv_i32 t = tcg_const_i32(vsz - 2);
2889
2890 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2891 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2892 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2893
2894 if (a->s) {
2895 fn_s(t, d, n, g, t);
2896 do_pred_flags(t);
2897 } else {
2898 fn(d, n, g, t);
2899 }
2900 tcg_temp_free_ptr(d);
2901 tcg_temp_free_ptr(n);
2902 tcg_temp_free_ptr(g);
2903 tcg_temp_free_i32(t);
2904 return true;
2905}
2906
3a7be554 2907static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2908{
2909 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2910}
2911
3a7be554 2912static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2913{
2914 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2915}
2916
3a7be554 2917static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2918{
2919 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2920}
2921
3a7be554 2922static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2923{
2924 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2925}
2926
3a7be554 2927static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2928{
2929 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2930}
2931
3a7be554 2932static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2933{
2934 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2935}
2936
3a7be554 2937static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2938{
2939 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2940}
2941
9ee3a611
RH
2942/*
2943 *** SVE Predicate Count Group
2944 */
2945
2946static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2947{
2948 unsigned psz = pred_full_reg_size(s);
2949
2950 if (psz <= 8) {
2951 uint64_t psz_mask;
2952
2953 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2954 if (pn != pg) {
2955 TCGv_i64 g = tcg_temp_new_i64();
2956 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2957 tcg_gen_and_i64(val, val, g);
2958 tcg_temp_free_i64(g);
2959 }
2960
2961 /* Reduce the pred_esz_masks value simply to reduce the
2962 * size of the code generated here.
2963 */
2964 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2965 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2966
2967 tcg_gen_ctpop_i64(val, val);
2968 } else {
2969 TCGv_ptr t_pn = tcg_temp_new_ptr();
2970 TCGv_ptr t_pg = tcg_temp_new_ptr();
2971 unsigned desc;
2972 TCGv_i32 t_desc;
2973
2974 desc = psz - 2;
2975 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2976
2977 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2978 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2979 t_desc = tcg_const_i32(desc);
2980
2981 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2982 tcg_temp_free_ptr(t_pn);
2983 tcg_temp_free_ptr(t_pg);
2984 tcg_temp_free_i32(t_desc);
2985 }
2986}
2987
3a7be554 2988static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
2989{
2990 if (sve_access_check(s)) {
2991 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2992 }
2993 return true;
2994}
2995
3a7be554 2996static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
2997{
2998 if (sve_access_check(s)) {
2999 TCGv_i64 reg = cpu_reg(s, a->rd);
3000 TCGv_i64 val = tcg_temp_new_i64();
3001
3002 do_cntp(s, val, a->esz, a->pg, a->pg);
3003 if (a->d) {
3004 tcg_gen_sub_i64(reg, reg, val);
3005 } else {
3006 tcg_gen_add_i64(reg, reg, val);
3007 }
3008 tcg_temp_free_i64(val);
3009 }
3010 return true;
3011}
3012
3a7be554 3013static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3014{
3015 if (a->esz == 0) {
3016 return false;
3017 }
3018 if (sve_access_check(s)) {
3019 unsigned vsz = vec_full_reg_size(s);
3020 TCGv_i64 val = tcg_temp_new_i64();
3021 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3022
3023 do_cntp(s, val, a->esz, a->pg, a->pg);
3024 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3025 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3026 }
3027 return true;
3028}
3029
3a7be554 3030static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3031{
3032 if (sve_access_check(s)) {
3033 TCGv_i64 reg = cpu_reg(s, a->rd);
3034 TCGv_i64 val = tcg_temp_new_i64();
3035
3036 do_cntp(s, val, a->esz, a->pg, a->pg);
3037 do_sat_addsub_32(reg, val, a->u, a->d);
3038 }
3039 return true;
3040}
3041
3a7be554 3042static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3043{
3044 if (sve_access_check(s)) {
3045 TCGv_i64 reg = cpu_reg(s, a->rd);
3046 TCGv_i64 val = tcg_temp_new_i64();
3047
3048 do_cntp(s, val, a->esz, a->pg, a->pg);
3049 do_sat_addsub_64(reg, val, a->u, a->d);
3050 }
3051 return true;
3052}
3053
3a7be554 3054static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3055{
3056 if (a->esz == 0) {
3057 return false;
3058 }
3059 if (sve_access_check(s)) {
3060 TCGv_i64 val = tcg_temp_new_i64();
3061 do_cntp(s, val, a->esz, a->pg, a->pg);
3062 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3063 }
3064 return true;
3065}
3066
caf1cefc
RH
3067/*
3068 *** SVE Integer Compare Scalars Group
3069 */
3070
3a7be554 3071static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3072{
3073 if (!sve_access_check(s)) {
3074 return true;
3075 }
3076
3077 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3078 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3079 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3080 TCGv_i64 cmp = tcg_temp_new_i64();
3081
3082 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3083 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3084 tcg_temp_free_i64(cmp);
3085
3086 /* VF = !NF & !CF. */
3087 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3088 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3089
3090 /* Both NF and VF actually look at bit 31. */
3091 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3092 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3093 return true;
3094}
3095
3a7be554 3096static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3097{
bbd0968c 3098 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3099 TCGv_i32 t2, t3;
3100 TCGv_ptr ptr;
3101 unsigned desc, vsz = vec_full_reg_size(s);
3102 TCGCond cond;
3103
bbd0968c
RH
3104 if (!sve_access_check(s)) {
3105 return true;
3106 }
3107
3108 op0 = read_cpu_reg(s, a->rn, 1);
3109 op1 = read_cpu_reg(s, a->rm, 1);
3110
caf1cefc
RH
3111 if (!a->sf) {
3112 if (a->u) {
3113 tcg_gen_ext32u_i64(op0, op0);
3114 tcg_gen_ext32u_i64(op1, op1);
3115 } else {
3116 tcg_gen_ext32s_i64(op0, op0);
3117 tcg_gen_ext32s_i64(op1, op1);
3118 }
3119 }
3120
3121 /* For the helper, compress the different conditions into a computation
3122 * of how many iterations for which the condition is true.
caf1cefc 3123 */
bbd0968c
RH
3124 t0 = tcg_temp_new_i64();
3125 t1 = tcg_temp_new_i64();
caf1cefc
RH
3126 tcg_gen_sub_i64(t0, op1, op0);
3127
bbd0968c 3128 tmax = tcg_const_i64(vsz >> a->esz);
caf1cefc
RH
3129 if (a->eq) {
3130 /* Equality means one more iteration. */
3131 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c
RH
3132
3133 /* If op1 is max (un)signed integer (and the only time the addition
3134 * above could overflow), then we produce an all-true predicate by
3135 * setting the count to the vector length. This is because the
3136 * pseudocode is described as an increment + compare loop, and the
3137 * max integer would always compare true.
3138 */
3139 tcg_gen_movi_i64(t1, (a->sf
3140 ? (a->u ? UINT64_MAX : INT64_MAX)
3141 : (a->u ? UINT32_MAX : INT32_MAX)));
3142 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3143 }
3144
bbd0968c
RH
3145 /* Bound to the maximum. */
3146 tcg_gen_umin_i64(t0, t0, tmax);
3147 tcg_temp_free_i64(tmax);
3148
3149 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3150 cond = (a->u
3151 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3152 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3153 tcg_gen_movi_i64(t1, 0);
3154 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3155 tcg_temp_free_i64(t1);
caf1cefc 3156
bbd0968c 3157 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3158 t2 = tcg_temp_new_i32();
3159 tcg_gen_extrl_i64_i32(t2, t0);
3160 tcg_temp_free_i64(t0);
bbd0968c
RH
3161
3162 /* Scale elements to bits. */
3163 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc
RH
3164
3165 desc = (vsz / 8) - 2;
3166 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3167 t3 = tcg_const_i32(desc);
3168
3169 ptr = tcg_temp_new_ptr();
3170 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3171
3172 gen_helper_sve_while(t2, ptr, t2, t3);
3173 do_pred_flags(t2);
3174
3175 tcg_temp_free_ptr(ptr);
3176 tcg_temp_free_i32(t2);
3177 tcg_temp_free_i32(t3);
3178 return true;
3179}
3180
ed491961
RH
3181/*
3182 *** SVE Integer Wide Immediate - Unpredicated Group
3183 */
3184
3a7be554 3185static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3186{
3187 if (a->esz == 0) {
3188 return false;
3189 }
3190 if (sve_access_check(s)) {
3191 unsigned vsz = vec_full_reg_size(s);
3192 int dofs = vec_full_reg_offset(s, a->rd);
3193 uint64_t imm;
3194
3195 /* Decode the VFP immediate. */
3196 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3197 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3198 }
3199 return true;
3200}
3201
3a7be554 3202static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3203{
3a7be554 3204 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3205 return false;
3206 }
3207 if (sve_access_check(s)) {
3208 unsigned vsz = vec_full_reg_size(s);
3209 int dofs = vec_full_reg_offset(s, a->rd);
3210
8711e71f 3211 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3212 }
3213 return true;
3214}
3215
3a7be554 3216static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3217{
3a7be554 3218 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3219 return false;
3220 }
3221 if (sve_access_check(s)) {
3222 unsigned vsz = vec_full_reg_size(s);
3223 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3224 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3225 }
3226 return true;
3227}
3228
3a7be554 3229static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3230{
3231 a->imm = -a->imm;
3a7be554 3232 return trans_ADD_zzi(s, a);
6e6a157d
RH
3233}
3234
3a7be554 3235static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3236{
53229a77 3237 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3238 static const GVecGen2s op[4] = {
3239 { .fni8 = tcg_gen_vec_sub8_i64,
3240 .fniv = tcg_gen_sub_vec,
3241 .fno = gen_helper_sve_subri_b,
53229a77 3242 .opt_opc = vecop_list,
6e6a157d
RH
3243 .vece = MO_8,
3244 .scalar_first = true },
3245 { .fni8 = tcg_gen_vec_sub16_i64,
3246 .fniv = tcg_gen_sub_vec,
3247 .fno = gen_helper_sve_subri_h,
53229a77 3248 .opt_opc = vecop_list,
6e6a157d
RH
3249 .vece = MO_16,
3250 .scalar_first = true },
3251 { .fni4 = tcg_gen_sub_i32,
3252 .fniv = tcg_gen_sub_vec,
3253 .fno = gen_helper_sve_subri_s,
53229a77 3254 .opt_opc = vecop_list,
6e6a157d
RH
3255 .vece = MO_32,
3256 .scalar_first = true },
3257 { .fni8 = tcg_gen_sub_i64,
3258 .fniv = tcg_gen_sub_vec,
3259 .fno = gen_helper_sve_subri_d,
53229a77 3260 .opt_opc = vecop_list,
6e6a157d
RH
3261 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3262 .vece = MO_64,
3263 .scalar_first = true }
3264 };
3265
3a7be554 3266 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3267 return false;
3268 }
3269 if (sve_access_check(s)) {
3270 unsigned vsz = vec_full_reg_size(s);
3271 TCGv_i64 c = tcg_const_i64(a->imm);
3272 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3273 vec_full_reg_offset(s, a->rn),
3274 vsz, vsz, c, &op[a->esz]);
3275 tcg_temp_free_i64(c);
3276 }
3277 return true;
3278}
3279
3a7be554 3280static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3281{
3282 if (sve_access_check(s)) {
3283 unsigned vsz = vec_full_reg_size(s);
3284 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3285 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3286 }
3287 return true;
3288}
3289
3a7be554 3290static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3291{
3a7be554 3292 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3293 return false;
3294 }
3295 if (sve_access_check(s)) {
3296 TCGv_i64 val = tcg_const_i64(a->imm);
3297 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3298 tcg_temp_free_i64(val);
3299 }
3300 return true;
3301}
3302
3a7be554 3303static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3304{
3a7be554 3305 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3306}
3307
3a7be554 3308static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3309{
3a7be554 3310 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3311}
3312
3a7be554 3313static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3314{
3a7be554 3315 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3316}
3317
3a7be554 3318static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3319{
3a7be554 3320 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3321}
3322
3323static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3324{
3325 if (sve_access_check(s)) {
3326 unsigned vsz = vec_full_reg_size(s);
3327 TCGv_i64 c = tcg_const_i64(a->imm);
3328
3329 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3330 vec_full_reg_offset(s, a->rn),
3331 c, vsz, vsz, 0, fn);
3332 tcg_temp_free_i64(c);
3333 }
3334 return true;
3335}
3336
3337#define DO_ZZI(NAME, name) \
3a7be554 3338static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3339{ \
3340 static gen_helper_gvec_2i * const fns[4] = { \
3341 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3342 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3343 }; \
3344 return do_zzi_ool(s, a, fns[a->esz]); \
3345}
3346
3347DO_ZZI(SMAX, smax)
3348DO_ZZI(UMAX, umax)
3349DO_ZZI(SMIN, smin)
3350DO_ZZI(UMIN, umin)
3351
3352#undef DO_ZZI
3353
3a7be554 3354static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3355{
3356 static gen_helper_gvec_3 * const fns[2][2] = {
3357 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3358 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3359 };
3360
3361 if (sve_access_check(s)) {
e645d1a1 3362 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
d730ecaa
RH
3363 }
3364 return true;
3365}
3366
3a7be554 3367static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3368{
3369 static gen_helper_gvec_3 * const fns[2][2] = {
3370 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3371 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3372 };
3373
3374 if (sve_access_check(s)) {
e645d1a1 3375 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
16fcfdc7
RH
3376 }
3377 return true;
3378}
3379
3380
ca40a6e6
RH
3381/*
3382 *** SVE Floating Point Multiply-Add Indexed Group
3383 */
3384
3a7be554 3385static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3386{
3387 static gen_helper_gvec_4_ptr * const fns[3] = {
3388 gen_helper_gvec_fmla_idx_h,
3389 gen_helper_gvec_fmla_idx_s,
3390 gen_helper_gvec_fmla_idx_d,
3391 };
3392
3393 if (sve_access_check(s)) {
3394 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3395 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3396 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3397 vec_full_reg_offset(s, a->rn),
3398 vec_full_reg_offset(s, a->rm),
3399 vec_full_reg_offset(s, a->ra),
3400 status, vsz, vsz, (a->index << 1) | a->sub,
3401 fns[a->esz - 1]);
3402 tcg_temp_free_ptr(status);
3403 }
3404 return true;
3405}
3406
3407/*
3408 *** SVE Floating Point Multiply Indexed Group
3409 */
3410
3a7be554 3411static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3412{
3413 static gen_helper_gvec_3_ptr * const fns[3] = {
3414 gen_helper_gvec_fmul_idx_h,
3415 gen_helper_gvec_fmul_idx_s,
3416 gen_helper_gvec_fmul_idx_d,
3417 };
3418
3419 if (sve_access_check(s)) {
3420 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3421 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3422 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3423 vec_full_reg_offset(s, a->rn),
3424 vec_full_reg_offset(s, a->rm),
3425 status, vsz, vsz, a->index, fns[a->esz - 1]);
3426 tcg_temp_free_ptr(status);
3427 }
3428 return true;
3429}
3430
23fbe79f
RH
3431/*
3432 *** SVE Floating Point Fast Reduction Group
3433 */
3434
3435typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3436 TCGv_ptr, TCGv_i32);
3437
3438static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3439 gen_helper_fp_reduce *fn)
3440{
3441 unsigned vsz = vec_full_reg_size(s);
3442 unsigned p2vsz = pow2ceil(vsz);
3443 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3444 TCGv_ptr t_zn, t_pg, status;
3445 TCGv_i64 temp;
3446
3447 temp = tcg_temp_new_i64();
3448 t_zn = tcg_temp_new_ptr();
3449 t_pg = tcg_temp_new_ptr();
3450
3451 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3452 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3453 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3454
3455 fn(temp, t_zn, t_pg, status, t_desc);
3456 tcg_temp_free_ptr(t_zn);
3457 tcg_temp_free_ptr(t_pg);
3458 tcg_temp_free_ptr(status);
3459 tcg_temp_free_i32(t_desc);
3460
3461 write_fp_dreg(s, a->rd, temp);
3462 tcg_temp_free_i64(temp);
3463}
3464
3465#define DO_VPZ(NAME, name) \
3a7be554 3466static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3467{ \
3468 static gen_helper_fp_reduce * const fns[3] = { \
3469 gen_helper_sve_##name##_h, \
3470 gen_helper_sve_##name##_s, \
3471 gen_helper_sve_##name##_d, \
3472 }; \
3473 if (a->esz == 0) { \
3474 return false; \
3475 } \
3476 if (sve_access_check(s)) { \
3477 do_reduce(s, a, fns[a->esz - 1]); \
3478 } \
3479 return true; \
3480}
3481
3482DO_VPZ(FADDV, faddv)
3483DO_VPZ(FMINNMV, fminnmv)
3484DO_VPZ(FMAXNMV, fmaxnmv)
3485DO_VPZ(FMINV, fminv)
3486DO_VPZ(FMAXV, fmaxv)
3487
3887c038
RH
3488/*
3489 *** SVE Floating Point Unary Operations - Unpredicated Group
3490 */
3491
3492static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3493{
3494 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3495 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3496
3497 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3498 vec_full_reg_offset(s, a->rn),
3499 status, vsz, vsz, 0, fn);
3500 tcg_temp_free_ptr(status);
3501}
3502
3a7be554 3503static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3504{
3505 static gen_helper_gvec_2_ptr * const fns[3] = {
3506 gen_helper_gvec_frecpe_h,
3507 gen_helper_gvec_frecpe_s,
3508 gen_helper_gvec_frecpe_d,
3509 };
3510 if (a->esz == 0) {
3511 return false;
3512 }
3513 if (sve_access_check(s)) {
3514 do_zz_fp(s, a, fns[a->esz - 1]);
3515 }
3516 return true;
3517}
3518
3a7be554 3519static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3520{
3521 static gen_helper_gvec_2_ptr * const fns[3] = {
3522 gen_helper_gvec_frsqrte_h,
3523 gen_helper_gvec_frsqrte_s,
3524 gen_helper_gvec_frsqrte_d,
3525 };
3526 if (a->esz == 0) {
3527 return false;
3528 }
3529 if (sve_access_check(s)) {
3530 do_zz_fp(s, a, fns[a->esz - 1]);
3531 }
3532 return true;
3533}
3534
4d2e2a03
RH
3535/*
3536 *** SVE Floating Point Compare with Zero Group
3537 */
3538
3539static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3540 gen_helper_gvec_3_ptr *fn)
3541{
3542 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3543 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3544
3545 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3546 vec_full_reg_offset(s, a->rn),
3547 pred_full_reg_offset(s, a->pg),
3548 status, vsz, vsz, 0, fn);
3549 tcg_temp_free_ptr(status);
3550}
3551
3552#define DO_PPZ(NAME, name) \
3a7be554 3553static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3554{ \
3555 static gen_helper_gvec_3_ptr * const fns[3] = { \
3556 gen_helper_sve_##name##_h, \
3557 gen_helper_sve_##name##_s, \
3558 gen_helper_sve_##name##_d, \
3559 }; \
3560 if (a->esz == 0) { \
3561 return false; \
3562 } \
3563 if (sve_access_check(s)) { \
3564 do_ppz_fp(s, a, fns[a->esz - 1]); \
3565 } \
3566 return true; \
3567}
3568
3569DO_PPZ(FCMGE_ppz0, fcmge0)
3570DO_PPZ(FCMGT_ppz0, fcmgt0)
3571DO_PPZ(FCMLE_ppz0, fcmle0)
3572DO_PPZ(FCMLT_ppz0, fcmlt0)
3573DO_PPZ(FCMEQ_ppz0, fcmeq0)
3574DO_PPZ(FCMNE_ppz0, fcmne0)
3575
3576#undef DO_PPZ
3577
67fcd9ad
RH
3578/*
3579 *** SVE floating-point trig multiply-add coefficient
3580 */
3581
3a7be554 3582static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3583{
3584 static gen_helper_gvec_3_ptr * const fns[3] = {
3585 gen_helper_sve_ftmad_h,
3586 gen_helper_sve_ftmad_s,
3587 gen_helper_sve_ftmad_d,
3588 };
3589
3590 if (a->esz == 0) {
3591 return false;
3592 }
3593 if (sve_access_check(s)) {
3594 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3595 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3596 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3597 vec_full_reg_offset(s, a->rn),
3598 vec_full_reg_offset(s, a->rm),
3599 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3600 tcg_temp_free_ptr(status);
3601 }
3602 return true;
3603}
3604
7f9ddf64
RH
3605/*
3606 *** SVE Floating Point Accumulating Reduction Group
3607 */
3608
3a7be554 3609static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3610{
3611 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3612 TCGv_ptr, TCGv_ptr, TCGv_i32);
3613 static fadda_fn * const fns[3] = {
3614 gen_helper_sve_fadda_h,
3615 gen_helper_sve_fadda_s,
3616 gen_helper_sve_fadda_d,
3617 };
3618 unsigned vsz = vec_full_reg_size(s);
3619 TCGv_ptr t_rm, t_pg, t_fpst;
3620 TCGv_i64 t_val;
3621 TCGv_i32 t_desc;
3622
3623 if (a->esz == 0) {
3624 return false;
3625 }
3626 if (!sve_access_check(s)) {
3627 return true;
3628 }
3629
3630 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3631 t_rm = tcg_temp_new_ptr();
3632 t_pg = tcg_temp_new_ptr();
3633 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3634 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3635 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
3636 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3637
3638 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3639
3640 tcg_temp_free_i32(t_desc);
3641 tcg_temp_free_ptr(t_fpst);
3642 tcg_temp_free_ptr(t_pg);
3643 tcg_temp_free_ptr(t_rm);
3644
3645 write_fp_dreg(s, a->rd, t_val);
3646 tcg_temp_free_i64(t_val);
3647 return true;
3648}
3649
29b80469
RH
3650/*
3651 *** SVE Floating Point Arithmetic - Unpredicated Group
3652 */
3653
3654static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3655 gen_helper_gvec_3_ptr *fn)
3656{
3657 if (fn == NULL) {
3658 return false;
3659 }
3660 if (sve_access_check(s)) {
3661 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3662 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
3663 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3664 vec_full_reg_offset(s, a->rn),
3665 vec_full_reg_offset(s, a->rm),
3666 status, vsz, vsz, 0, fn);
3667 tcg_temp_free_ptr(status);
3668 }
3669 return true;
3670}
3671
3672
3673#define DO_FP3(NAME, name) \
3a7be554 3674static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3675{ \
3676 static gen_helper_gvec_3_ptr * const fns[4] = { \
3677 NULL, gen_helper_gvec_##name##_h, \
3678 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3679 }; \
3680 return do_zzz_fp(s, a, fns[a->esz]); \
3681}
3682
3683DO_FP3(FADD_zzz, fadd)
3684DO_FP3(FSUB_zzz, fsub)
3685DO_FP3(FMUL_zzz, fmul)
3686DO_FP3(FTSMUL, ftsmul)
3687DO_FP3(FRECPS, recps)
3688DO_FP3(FRSQRTS, rsqrts)
3689
3690#undef DO_FP3
3691
ec3b87c2
RH
3692/*
3693 *** SVE Floating Point Arithmetic - Predicated Group
3694 */
3695
3696static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3697 gen_helper_gvec_4_ptr *fn)
3698{
3699 if (fn == NULL) {
3700 return false;
3701 }
3702 if (sve_access_check(s)) {
3703 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3704 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3705 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3706 vec_full_reg_offset(s, a->rn),
3707 vec_full_reg_offset(s, a->rm),
3708 pred_full_reg_offset(s, a->pg),
3709 status, vsz, vsz, 0, fn);
3710 tcg_temp_free_ptr(status);
3711 }
3712 return true;
3713}
3714
3715#define DO_FP3(NAME, name) \
3a7be554 3716static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3717{ \
3718 static gen_helper_gvec_4_ptr * const fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3721 }; \
3722 return do_zpzz_fp(s, a, fns[a->esz]); \
3723}
3724
3725DO_FP3(FADD_zpzz, fadd)
3726DO_FP3(FSUB_zpzz, fsub)
3727DO_FP3(FMUL_zpzz, fmul)
3728DO_FP3(FMIN_zpzz, fmin)
3729DO_FP3(FMAX_zpzz, fmax)
3730DO_FP3(FMINNM_zpzz, fminnum)
3731DO_FP3(FMAXNM_zpzz, fmaxnum)
3732DO_FP3(FABD, fabd)
3733DO_FP3(FSCALE, fscalbn)
3734DO_FP3(FDIV, fdiv)
3735DO_FP3(FMULX, fmulx)
3736
3737#undef DO_FP3
8092c6a3 3738
cc48affe
RH
3739typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3740 TCGv_i64, TCGv_ptr, TCGv_i32);
3741
3742static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3743 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3744{
3745 unsigned vsz = vec_full_reg_size(s);
3746 TCGv_ptr t_zd, t_zn, t_pg, status;
3747 TCGv_i32 desc;
3748
3749 t_zd = tcg_temp_new_ptr();
3750 t_zn = tcg_temp_new_ptr();
3751 t_pg = tcg_temp_new_ptr();
3752 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3753 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3754 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3755
cdfb22bb 3756 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
3757 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3758 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3759
3760 tcg_temp_free_i32(desc);
3761 tcg_temp_free_ptr(status);
3762 tcg_temp_free_ptr(t_pg);
3763 tcg_temp_free_ptr(t_zn);
3764 tcg_temp_free_ptr(t_zd);
3765}
3766
3767static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3768 gen_helper_sve_fp2scalar *fn)
3769{
3770 TCGv_i64 temp = tcg_const_i64(imm);
3771 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3772 tcg_temp_free_i64(temp);
3773}
3774
3775#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3776static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3777{ \
3778 static gen_helper_sve_fp2scalar * const fns[3] = { \
3779 gen_helper_sve_##name##_h, \
3780 gen_helper_sve_##name##_s, \
3781 gen_helper_sve_##name##_d \
3782 }; \
3783 static uint64_t const val[3][2] = { \
3784 { float16_##const0, float16_##const1 }, \
3785 { float32_##const0, float32_##const1 }, \
3786 { float64_##const0, float64_##const1 }, \
3787 }; \
3788 if (a->esz == 0) { \
3789 return false; \
3790 } \
3791 if (sve_access_check(s)) { \
3792 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3793 } \
3794 return true; \
3795}
3796
cc48affe
RH
3797DO_FP_IMM(FADD, fadds, half, one)
3798DO_FP_IMM(FSUB, fsubs, half, one)
3799DO_FP_IMM(FMUL, fmuls, half, two)
3800DO_FP_IMM(FSUBR, fsubrs, half, one)
3801DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3802DO_FP_IMM(FMINNM, fminnms, zero, one)
3803DO_FP_IMM(FMAX, fmaxs, zero, one)
3804DO_FP_IMM(FMIN, fmins, zero, one)
3805
3806#undef DO_FP_IMM
3807
abfdefd5
RH
3808static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3809 gen_helper_gvec_4_ptr *fn)
3810{
3811 if (fn == NULL) {
3812 return false;
3813 }
3814 if (sve_access_check(s)) {
3815 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3816 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3817 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3818 vec_full_reg_offset(s, a->rn),
3819 vec_full_reg_offset(s, a->rm),
3820 pred_full_reg_offset(s, a->pg),
3821 status, vsz, vsz, 0, fn);
3822 tcg_temp_free_ptr(status);
3823 }
3824 return true;
3825}
3826
3827#define DO_FPCMP(NAME, name) \
3a7be554 3828static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3829{ \
3830 static gen_helper_gvec_4_ptr * const fns[4] = { \
3831 NULL, gen_helper_sve_##name##_h, \
3832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3833 }; \
3834 return do_fp_cmp(s, a, fns[a->esz]); \
3835}
3836
3837DO_FPCMP(FCMGE, fcmge)
3838DO_FPCMP(FCMGT, fcmgt)
3839DO_FPCMP(FCMEQ, fcmeq)
3840DO_FPCMP(FCMNE, fcmne)
3841DO_FPCMP(FCMUO, fcmuo)
3842DO_FPCMP(FACGE, facge)
3843DO_FPCMP(FACGT, facgt)
3844
3845#undef DO_FPCMP
3846
3a7be554 3847static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3848{
3849 static gen_helper_gvec_4_ptr * const fns[3] = {
3850 gen_helper_sve_fcadd_h,
3851 gen_helper_sve_fcadd_s,
3852 gen_helper_sve_fcadd_d
3853 };
3854
3855 if (a->esz == 0) {
3856 return false;
3857 }
3858 if (sve_access_check(s)) {
3859 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3860 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3861 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3862 vec_full_reg_offset(s, a->rn),
3863 vec_full_reg_offset(s, a->rm),
3864 pred_full_reg_offset(s, a->pg),
3865 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3866 tcg_temp_free_ptr(status);
3867 }
3868 return true;
3869}
3870
08975da9
RH
3871static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3872 gen_helper_gvec_5_ptr *fn)
6ceabaad 3873{
08975da9 3874 if (a->esz == 0) {
6ceabaad
RH
3875 return false;
3876 }
08975da9
RH
3877 if (sve_access_check(s)) {
3878 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3879 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3880 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3881 vec_full_reg_offset(s, a->rn),
3882 vec_full_reg_offset(s, a->rm),
3883 vec_full_reg_offset(s, a->ra),
3884 pred_full_reg_offset(s, a->pg),
3885 status, vsz, vsz, 0, fn);
3886 tcg_temp_free_ptr(status);
6ceabaad 3887 }
6ceabaad
RH
3888 return true;
3889}
3890
3891#define DO_FMLA(NAME, name) \
3a7be554 3892static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 3893{ \
08975da9 3894 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
3895 NULL, gen_helper_sve_##name##_h, \
3896 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3897 }; \
3898 return do_fmla(s, a, fns[a->esz]); \
3899}
3900
3901DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3902DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3903DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3904DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3905
3906#undef DO_FMLA
3907
3a7be554 3908static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 3909{
08975da9
RH
3910 static gen_helper_gvec_5_ptr * const fns[4] = {
3911 NULL,
05f48bab
RH
3912 gen_helper_sve_fcmla_zpzzz_h,
3913 gen_helper_sve_fcmla_zpzzz_s,
3914 gen_helper_sve_fcmla_zpzzz_d,
3915 };
3916
3917 if (a->esz == 0) {
3918 return false;
3919 }
3920 if (sve_access_check(s)) {
3921 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3922 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3923 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3924 vec_full_reg_offset(s, a->rn),
3925 vec_full_reg_offset(s, a->rm),
3926 vec_full_reg_offset(s, a->ra),
3927 pred_full_reg_offset(s, a->pg),
3928 status, vsz, vsz, a->rot, fns[a->esz]);
3929 tcg_temp_free_ptr(status);
05f48bab
RH
3930 }
3931 return true;
3932}
3933
3a7be554 3934static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
3935{
3936 static gen_helper_gvec_3_ptr * const fns[2] = {
3937 gen_helper_gvec_fcmlah_idx,
3938 gen_helper_gvec_fcmlas_idx,
3939 };
3940
3941 tcg_debug_assert(a->esz == 1 || a->esz == 2);
3942 tcg_debug_assert(a->rd == a->ra);
3943 if (sve_access_check(s)) {
3944 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3945 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
3946 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3947 vec_full_reg_offset(s, a->rn),
3948 vec_full_reg_offset(s, a->rm),
3949 status, vsz, vsz,
3950 a->index * 4 + a->rot,
3951 fns[a->esz - 1]);
3952 tcg_temp_free_ptr(status);
3953 }
3954 return true;
3955}
3956
8092c6a3
RH
3957/*
3958 *** SVE Floating Point Unary Operations Predicated Group
3959 */
3960
3961static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3962 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3963{
3964 if (sve_access_check(s)) {
3965 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3966 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
3967 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3968 vec_full_reg_offset(s, rn),
3969 pred_full_reg_offset(s, pg),
3970 status, vsz, vsz, 0, fn);
3971 tcg_temp_free_ptr(status);
3972 }
3973 return true;
3974}
3975
3a7be554 3976static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3977{
e4ab5124 3978 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
3979}
3980
3a7be554 3981static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3982{
3983 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3984}
3985
3a7be554 3986static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3987{
e4ab5124 3988 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
3989}
3990
3a7be554 3991static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3992{
3993 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
3994}
3995
3a7be554 3996static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3997{
3998 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
3999}
4000
3a7be554 4001static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4002{
4003 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4004}
4005
3a7be554 4006static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4007{
4008 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4009}
4010
3a7be554 4011static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4012{
4013 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4014}
4015
3a7be554 4016static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4017{
4018 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4019}
4020
3a7be554 4021static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4022{
4023 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4024}
4025
3a7be554 4026static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4027{
4028 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4029}
4030
3a7be554 4031static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4032{
4033 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4034}
4035
3a7be554 4036static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4037{
4038 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4039}
4040
3a7be554 4041static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4042{
4043 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4044}
4045
3a7be554 4046static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4047{
4048 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4049}
4050
3a7be554 4051static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4052{
4053 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4054}
4055
3a7be554 4056static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4057{
4058 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4059}
4060
3a7be554 4061static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4062{
4063 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4064}
4065
3a7be554 4066static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4067{
4068 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4069}
4070
3a7be554 4071static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4072{
4073 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4074}
4075
cda3c753
RH
4076static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4077 gen_helper_sve_frint_h,
4078 gen_helper_sve_frint_s,
4079 gen_helper_sve_frint_d
4080};
4081
3a7be554 4082static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4083{
4084 if (a->esz == 0) {
4085 return false;
4086 }
4087 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4088 frint_fns[a->esz - 1]);
4089}
4090
3a7be554 4091static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4092{
4093 static gen_helper_gvec_3_ptr * const fns[3] = {
4094 gen_helper_sve_frintx_h,
4095 gen_helper_sve_frintx_s,
4096 gen_helper_sve_frintx_d
4097 };
4098 if (a->esz == 0) {
4099 return false;
4100 }
4101 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4102}
4103
4104static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4105{
4106 if (a->esz == 0) {
4107 return false;
4108 }
4109 if (sve_access_check(s)) {
4110 unsigned vsz = vec_full_reg_size(s);
4111 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4112 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4113
4114 gen_helper_set_rmode(tmode, tmode, status);
4115
4116 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4117 vec_full_reg_offset(s, a->rn),
4118 pred_full_reg_offset(s, a->pg),
4119 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4120
4121 gen_helper_set_rmode(tmode, tmode, status);
4122 tcg_temp_free_i32(tmode);
4123 tcg_temp_free_ptr(status);
4124 }
4125 return true;
4126}
4127
3a7be554 4128static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4129{
4130 return do_frint_mode(s, a, float_round_nearest_even);
4131}
4132
3a7be554 4133static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4134{
4135 return do_frint_mode(s, a, float_round_up);
4136}
4137
3a7be554 4138static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4139{
4140 return do_frint_mode(s, a, float_round_down);
4141}
4142
3a7be554 4143static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4144{
4145 return do_frint_mode(s, a, float_round_to_zero);
4146}
4147
3a7be554 4148static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4149{
4150 return do_frint_mode(s, a, float_round_ties_away);
4151}
4152
3a7be554 4153static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4154{
4155 static gen_helper_gvec_3_ptr * const fns[3] = {
4156 gen_helper_sve_frecpx_h,
4157 gen_helper_sve_frecpx_s,
4158 gen_helper_sve_frecpx_d
4159 };
4160 if (a->esz == 0) {
4161 return false;
4162 }
4163 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4164}
4165
3a7be554 4166static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4167{
4168 static gen_helper_gvec_3_ptr * const fns[3] = {
4169 gen_helper_sve_fsqrt_h,
4170 gen_helper_sve_fsqrt_s,
4171 gen_helper_sve_fsqrt_d
4172 };
4173 if (a->esz == 0) {
4174 return false;
4175 }
4176 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4177}
4178
3a7be554 4179static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4180{
4181 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4182}
4183
3a7be554 4184static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4185{
4186 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4187}
4188
3a7be554 4189static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4190{
4191 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4192}
4193
3a7be554 4194static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4195{
4196 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4197}
4198
3a7be554 4199static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4200{
4201 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4202}
4203
3a7be554 4204static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4205{
4206 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4207}
4208
3a7be554 4209static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4210{
4211 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4212}
4213
3a7be554 4214static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4215{
4216 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4217}
4218
3a7be554 4219static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4220{
4221 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4222}
4223
3a7be554 4224static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4225{
4226 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4227}
4228
3a7be554 4229static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4230{
4231 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4232}
4233
3a7be554 4234static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4235{
4236 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4237}
4238
3a7be554 4239static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4240{
4241 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4242}
4243
3a7be554 4244static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4245{
4246 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4247}
4248
d1822297
RH
4249/*
4250 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4251 */
4252
4253/* Subroutine loading a vector register at VOFS of LEN bytes.
4254 * The load should begin at the address Rn + IMM.
4255 */
4256
19f2acc9 4257static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4258{
19f2acc9
RH
4259 int len_align = QEMU_ALIGN_DOWN(len, 8);
4260 int len_remain = len % 8;
4261 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4262 int midx = get_mem_index(s);
b2aa8879 4263 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4264
b2aa8879
RH
4265 dirty_addr = tcg_temp_new_i64();
4266 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4267 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4268 tcg_temp_free_i64(dirty_addr);
d1822297 4269
b2aa8879
RH
4270 /*
4271 * Note that unpredicated load/store of vector/predicate registers
d1822297 4272 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4273 * operations on larger quantities.
d1822297
RH
4274 * Attempt to keep code expansion to a minimum by limiting the
4275 * amount of unrolling done.
4276 */
4277 if (nparts <= 4) {
4278 int i;
4279
b2aa8879 4280 t0 = tcg_temp_new_i64();
d1822297 4281 for (i = 0; i < len_align; i += 8) {
b2aa8879 4282 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
d1822297 4283 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4284 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4285 }
b2aa8879 4286 tcg_temp_free_i64(t0);
d1822297
RH
4287 } else {
4288 TCGLabel *loop = gen_new_label();
4289 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4290
b2aa8879
RH
4291 /* Copy the clean address into a local temp, live across the loop. */
4292 t0 = clean_addr;
4b4dc975 4293 clean_addr = new_tmp_a64_local(s);
b2aa8879 4294 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4295
b2aa8879 4296 gen_set_label(loop);
d1822297 4297
b2aa8879
RH
4298 t0 = tcg_temp_new_i64();
4299 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4300 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4301
b2aa8879 4302 tp = tcg_temp_new_ptr();
d1822297
RH
4303 tcg_gen_add_ptr(tp, cpu_env, i);
4304 tcg_gen_addi_ptr(i, i, 8);
4305 tcg_gen_st_i64(t0, tp, vofs);
4306 tcg_temp_free_ptr(tp);
b2aa8879 4307 tcg_temp_free_i64(t0);
d1822297
RH
4308
4309 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4310 tcg_temp_free_ptr(i);
4311 }
4312
b2aa8879
RH
4313 /*
4314 * Predicate register loads can be any multiple of 2.
d1822297
RH
4315 * Note that we still store the entire 64-bit unit into cpu_env.
4316 */
4317 if (len_remain) {
b2aa8879 4318 t0 = tcg_temp_new_i64();
d1822297
RH
4319 switch (len_remain) {
4320 case 2:
4321 case 4:
4322 case 8:
b2aa8879
RH
4323 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4324 MO_LE | ctz32(len_remain));
d1822297
RH
4325 break;
4326
4327 case 6:
4328 t1 = tcg_temp_new_i64();
b2aa8879
RH
4329 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4330 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4331 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4332 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4333 tcg_temp_free_i64(t1);
4334 break;
4335
4336 default:
4337 g_assert_not_reached();
4338 }
4339 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4340 tcg_temp_free_i64(t0);
d1822297 4341 }
d1822297
RH
4342}
4343
5047c204 4344/* Similarly for stores. */
19f2acc9 4345static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4346{
19f2acc9
RH
4347 int len_align = QEMU_ALIGN_DOWN(len, 8);
4348 int len_remain = len % 8;
4349 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4350 int midx = get_mem_index(s);
bba87d0a 4351 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4352
bba87d0a
RH
4353 dirty_addr = tcg_temp_new_i64();
4354 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4355 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4356 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4357
4358 /* Note that unpredicated load/store of vector/predicate registers
4359 * are defined as a stream of bytes, which equates to little-endian
4360 * operations on larger quantities. There is no nice way to force
4361 * a little-endian store for aarch64_be-linux-user out of line.
4362 *
4363 * Attempt to keep code expansion to a minimum by limiting the
4364 * amount of unrolling done.
4365 */
4366 if (nparts <= 4) {
4367 int i;
4368
bba87d0a 4369 t0 = tcg_temp_new_i64();
5047c204
RH
4370 for (i = 0; i < len_align; i += 8) {
4371 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
bba87d0a 4372 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
d8227b09 4373 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4374 }
bba87d0a 4375 tcg_temp_free_i64(t0);
5047c204
RH
4376 } else {
4377 TCGLabel *loop = gen_new_label();
bba87d0a 4378 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4379
bba87d0a
RH
4380 /* Copy the clean address into a local temp, live across the loop. */
4381 t0 = clean_addr;
4b4dc975 4382 clean_addr = new_tmp_a64_local(s);
bba87d0a 4383 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4384
bba87d0a 4385 gen_set_label(loop);
5047c204 4386
bba87d0a
RH
4387 t0 = tcg_temp_new_i64();
4388 tp = tcg_temp_new_ptr();
4389 tcg_gen_add_ptr(tp, cpu_env, i);
4390 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4391 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4392 tcg_temp_free_ptr(tp);
4393
4394 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4395 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4396 tcg_temp_free_i64(t0);
5047c204
RH
4397
4398 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4399 tcg_temp_free_ptr(i);
4400 }
4401
4402 /* Predicate register stores can be any multiple of 2. */
4403 if (len_remain) {
bba87d0a 4404 t0 = tcg_temp_new_i64();
5047c204 4405 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4406
4407 switch (len_remain) {
4408 case 2:
4409 case 4:
4410 case 8:
bba87d0a
RH
4411 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4412 MO_LE | ctz32(len_remain));
5047c204
RH
4413 break;
4414
4415 case 6:
bba87d0a
RH
4416 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4417 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4418 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4419 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4420 break;
4421
4422 default:
4423 g_assert_not_reached();
4424 }
bba87d0a 4425 tcg_temp_free_i64(t0);
5047c204 4426 }
5047c204
RH
4427}
4428
3a7be554 4429static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4430{
4431 if (sve_access_check(s)) {
4432 int size = vec_full_reg_size(s);
4433 int off = vec_full_reg_offset(s, a->rd);
4434 do_ldr(s, off, size, a->rn, a->imm * size);
4435 }
4436 return true;
4437}
4438
3a7be554 4439static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4440{
4441 if (sve_access_check(s)) {
4442 int size = pred_full_reg_size(s);
4443 int off = pred_full_reg_offset(s, a->rd);
4444 do_ldr(s, off, size, a->rn, a->imm * size);
4445 }
4446 return true;
4447}
c4e7c493 4448
3a7be554 4449static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4450{
4451 if (sve_access_check(s)) {
4452 int size = vec_full_reg_size(s);
4453 int off = vec_full_reg_offset(s, a->rd);
4454 do_str(s, off, size, a->rn, a->imm * size);
4455 }
4456 return true;
4457}
4458
3a7be554 4459static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4460{
4461 if (sve_access_check(s)) {
4462 int size = pred_full_reg_size(s);
4463 int off = pred_full_reg_offset(s, a->rd);
4464 do_str(s, off, size, a->rn, a->imm * size);
4465 }
4466 return true;
4467}
4468
c4e7c493
RH
4469/*
4470 *** SVE Memory - Contiguous Load Group
4471 */
4472
4473/* The memory mode of the dtype. */
14776ab5 4474static const MemOp dtype_mop[16] = {
c4e7c493
RH
4475 MO_UB, MO_UB, MO_UB, MO_UB,
4476 MO_SL, MO_UW, MO_UW, MO_UW,
4477 MO_SW, MO_SW, MO_UL, MO_UL,
4478 MO_SB, MO_SB, MO_SB, MO_Q
4479};
4480
4481#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4482
4483/* The vector element size of dtype. */
4484static const uint8_t dtype_esz[16] = {
4485 0, 1, 2, 3,
4486 3, 1, 2, 3,
4487 3, 2, 2, 3,
4488 3, 2, 1, 3
4489};
4490
4491static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4492 int dtype, uint32_t mte_n, bool is_write,
4493 gen_helper_gvec_mem *fn)
c4e7c493
RH
4494{
4495 unsigned vsz = vec_full_reg_size(s);
4496 TCGv_ptr t_pg;
500d0484 4497 TCGv_i32 t_desc;
206adacf 4498 int desc = 0;
c4e7c493 4499
206adacf
RH
4500 /*
4501 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4502 * registers as pointers, so encode the regno into the data field.
4503 * For consistency, do this even for LD1.
4504 */
9473d0ec 4505 if (s->mte_active[0]) {
206adacf
RH
4506 int msz = dtype_msz(dtype);
4507
4508 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4509 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4510 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4511 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4512 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4513 desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4514 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4515 } else {
4516 addr = clean_data_tbi(s, addr);
206adacf 4517 }
9473d0ec 4518
206adacf 4519 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 4520 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4521 t_pg = tcg_temp_new_ptr();
4522
4523 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4524 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4525
4526 tcg_temp_free_ptr(t_pg);
500d0484 4527 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4528}
4529
4530static void do_ld_zpa(DisasContext *s, int zt, int pg,
4531 TCGv_i64 addr, int dtype, int nreg)
4532{
206adacf
RH
4533 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4534 { /* mte inactive, little-endian */
4535 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
7d0a57a2 4536 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
206adacf
RH
4537 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4538 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4539 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4540
4541 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4542 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4543 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4544 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4545 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4546
4547 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4549 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4550 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4551 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4552
4553 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4555 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4557 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4558
4559 /* mte inactive, big-endian */
4560 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4561 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4562 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4564 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4565
4566 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4567 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4568 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4569 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4570 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4571
4572 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4573 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4574 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4575 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4576 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4577
4578 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4579 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4580 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4581 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4582 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4583
4584 { /* mte active, little-endian */
4585 { { gen_helper_sve_ld1bb_r_mte,
4586 gen_helper_sve_ld2bb_r_mte,
4587 gen_helper_sve_ld3bb_r_mte,
4588 gen_helper_sve_ld4bb_r_mte },
4589 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4591 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4592
4593 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4594 { gen_helper_sve_ld1hh_le_r_mte,
4595 gen_helper_sve_ld2hh_le_r_mte,
4596 gen_helper_sve_ld3hh_le_r_mte,
4597 gen_helper_sve_ld4hh_le_r_mte },
4598 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4600
4601 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4602 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4603 { gen_helper_sve_ld1ss_le_r_mte,
4604 gen_helper_sve_ld2ss_le_r_mte,
4605 gen_helper_sve_ld3ss_le_r_mte,
4606 gen_helper_sve_ld4ss_le_r_mte },
4607 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4608
4609 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4610 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4612 { gen_helper_sve_ld1dd_le_r_mte,
4613 gen_helper_sve_ld2dd_le_r_mte,
4614 gen_helper_sve_ld3dd_le_r_mte,
4615 gen_helper_sve_ld4dd_le_r_mte } },
4616
4617 /* mte active, big-endian */
4618 { { gen_helper_sve_ld1bb_r_mte,
4619 gen_helper_sve_ld2bb_r_mte,
4620 gen_helper_sve_ld3bb_r_mte,
4621 gen_helper_sve_ld4bb_r_mte },
4622 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4623 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4625
4626 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4627 { gen_helper_sve_ld1hh_be_r_mte,
4628 gen_helper_sve_ld2hh_be_r_mte,
4629 gen_helper_sve_ld3hh_be_r_mte,
4630 gen_helper_sve_ld4hh_be_r_mte },
4631 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4633
4634 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1ss_be_r_mte,
4637 gen_helper_sve_ld2ss_be_r_mte,
4638 gen_helper_sve_ld3ss_be_r_mte,
4639 gen_helper_sve_ld4ss_be_r_mte },
4640 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4641
4642 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4643 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1dd_be_r_mte,
4646 gen_helper_sve_ld2dd_be_r_mte,
4647 gen_helper_sve_ld3dd_be_r_mte,
4648 gen_helper_sve_ld4dd_be_r_mte } } },
c4e7c493 4649 };
206adacf
RH
4650 gen_helper_gvec_mem *fn
4651 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4652
206adacf
RH
4653 /*
4654 * While there are holes in the table, they are not
c4e7c493
RH
4655 * accessible via the instruction encoding.
4656 */
4657 assert(fn != NULL);
206adacf 4658 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4659}
4660
3a7be554 4661static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4662{
4663 if (a->rm == 31) {
4664 return false;
4665 }
4666 if (sve_access_check(s)) {
4667 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4668 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4669 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4670 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4671 }
4672 return true;
4673}
4674
3a7be554 4675static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4676{
4677 if (sve_access_check(s)) {
4678 int vsz = vec_full_reg_size(s);
4679 int elements = vsz >> dtype_esz[a->dtype];
4680 TCGv_i64 addr = new_tmp_a64(s);
4681
4682 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4683 (a->imm * elements * (a->nreg + 1))
4684 << dtype_msz(a->dtype));
4685 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4686 }
4687 return true;
4688}
e2654d75 4689
3a7be554 4690static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4691{
aa13f7c3
RH
4692 static gen_helper_gvec_mem * const fns[2][2][16] = {
4693 { /* mte inactive, little-endian */
4694 { gen_helper_sve_ldff1bb_r,
4695 gen_helper_sve_ldff1bhu_r,
4696 gen_helper_sve_ldff1bsu_r,
4697 gen_helper_sve_ldff1bdu_r,
4698
4699 gen_helper_sve_ldff1sds_le_r,
4700 gen_helper_sve_ldff1hh_le_r,
4701 gen_helper_sve_ldff1hsu_le_r,
4702 gen_helper_sve_ldff1hdu_le_r,
4703
4704 gen_helper_sve_ldff1hds_le_r,
4705 gen_helper_sve_ldff1hss_le_r,
4706 gen_helper_sve_ldff1ss_le_r,
4707 gen_helper_sve_ldff1sdu_le_r,
4708
4709 gen_helper_sve_ldff1bds_r,
4710 gen_helper_sve_ldff1bss_r,
4711 gen_helper_sve_ldff1bhs_r,
4712 gen_helper_sve_ldff1dd_le_r },
4713
4714 /* mte inactive, big-endian */
4715 { gen_helper_sve_ldff1bb_r,
4716 gen_helper_sve_ldff1bhu_r,
4717 gen_helper_sve_ldff1bsu_r,
4718 gen_helper_sve_ldff1bdu_r,
4719
4720 gen_helper_sve_ldff1sds_be_r,
4721 gen_helper_sve_ldff1hh_be_r,
4722 gen_helper_sve_ldff1hsu_be_r,
4723 gen_helper_sve_ldff1hdu_be_r,
4724
4725 gen_helper_sve_ldff1hds_be_r,
4726 gen_helper_sve_ldff1hss_be_r,
4727 gen_helper_sve_ldff1ss_be_r,
4728 gen_helper_sve_ldff1sdu_be_r,
4729
4730 gen_helper_sve_ldff1bds_r,
4731 gen_helper_sve_ldff1bss_r,
4732 gen_helper_sve_ldff1bhs_r,
4733 gen_helper_sve_ldff1dd_be_r } },
4734
4735 { /* mte active, little-endian */
4736 { gen_helper_sve_ldff1bb_r_mte,
4737 gen_helper_sve_ldff1bhu_r_mte,
4738 gen_helper_sve_ldff1bsu_r_mte,
4739 gen_helper_sve_ldff1bdu_r_mte,
4740
4741 gen_helper_sve_ldff1sds_le_r_mte,
4742 gen_helper_sve_ldff1hh_le_r_mte,
4743 gen_helper_sve_ldff1hsu_le_r_mte,
4744 gen_helper_sve_ldff1hdu_le_r_mte,
4745
4746 gen_helper_sve_ldff1hds_le_r_mte,
4747 gen_helper_sve_ldff1hss_le_r_mte,
4748 gen_helper_sve_ldff1ss_le_r_mte,
4749 gen_helper_sve_ldff1sdu_le_r_mte,
4750
4751 gen_helper_sve_ldff1bds_r_mte,
4752 gen_helper_sve_ldff1bss_r_mte,
4753 gen_helper_sve_ldff1bhs_r_mte,
4754 gen_helper_sve_ldff1dd_le_r_mte },
4755
4756 /* mte active, big-endian */
4757 { gen_helper_sve_ldff1bb_r_mte,
4758 gen_helper_sve_ldff1bhu_r_mte,
4759 gen_helper_sve_ldff1bsu_r_mte,
4760 gen_helper_sve_ldff1bdu_r_mte,
4761
4762 gen_helper_sve_ldff1sds_be_r_mte,
4763 gen_helper_sve_ldff1hh_be_r_mte,
4764 gen_helper_sve_ldff1hsu_be_r_mte,
4765 gen_helper_sve_ldff1hdu_be_r_mte,
4766
4767 gen_helper_sve_ldff1hds_be_r_mte,
4768 gen_helper_sve_ldff1hss_be_r_mte,
4769 gen_helper_sve_ldff1ss_be_r_mte,
4770 gen_helper_sve_ldff1sdu_be_r_mte,
4771
4772 gen_helper_sve_ldff1bds_r_mte,
4773 gen_helper_sve_ldff1bss_r_mte,
4774 gen_helper_sve_ldff1bhs_r_mte,
4775 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4776 };
4777
4778 if (sve_access_check(s)) {
4779 TCGv_i64 addr = new_tmp_a64(s);
4780 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4781 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4782 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4783 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4784 }
4785 return true;
4786}
4787
3a7be554 4788static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4789{
aa13f7c3
RH
4790 static gen_helper_gvec_mem * const fns[2][2][16] = {
4791 { /* mte inactive, little-endian */
4792 { gen_helper_sve_ldnf1bb_r,
4793 gen_helper_sve_ldnf1bhu_r,
4794 gen_helper_sve_ldnf1bsu_r,
4795 gen_helper_sve_ldnf1bdu_r,
4796
4797 gen_helper_sve_ldnf1sds_le_r,
4798 gen_helper_sve_ldnf1hh_le_r,
4799 gen_helper_sve_ldnf1hsu_le_r,
4800 gen_helper_sve_ldnf1hdu_le_r,
4801
4802 gen_helper_sve_ldnf1hds_le_r,
4803 gen_helper_sve_ldnf1hss_le_r,
4804 gen_helper_sve_ldnf1ss_le_r,
4805 gen_helper_sve_ldnf1sdu_le_r,
4806
4807 gen_helper_sve_ldnf1bds_r,
4808 gen_helper_sve_ldnf1bss_r,
4809 gen_helper_sve_ldnf1bhs_r,
4810 gen_helper_sve_ldnf1dd_le_r },
4811
4812 /* mte inactive, big-endian */
4813 { gen_helper_sve_ldnf1bb_r,
4814 gen_helper_sve_ldnf1bhu_r,
4815 gen_helper_sve_ldnf1bsu_r,
4816 gen_helper_sve_ldnf1bdu_r,
4817
4818 gen_helper_sve_ldnf1sds_be_r,
4819 gen_helper_sve_ldnf1hh_be_r,
4820 gen_helper_sve_ldnf1hsu_be_r,
4821 gen_helper_sve_ldnf1hdu_be_r,
4822
4823 gen_helper_sve_ldnf1hds_be_r,
4824 gen_helper_sve_ldnf1hss_be_r,
4825 gen_helper_sve_ldnf1ss_be_r,
4826 gen_helper_sve_ldnf1sdu_be_r,
4827
4828 gen_helper_sve_ldnf1bds_r,
4829 gen_helper_sve_ldnf1bss_r,
4830 gen_helper_sve_ldnf1bhs_r,
4831 gen_helper_sve_ldnf1dd_be_r } },
4832
4833 { /* mte inactive, little-endian */
4834 { gen_helper_sve_ldnf1bb_r_mte,
4835 gen_helper_sve_ldnf1bhu_r_mte,
4836 gen_helper_sve_ldnf1bsu_r_mte,
4837 gen_helper_sve_ldnf1bdu_r_mte,
4838
4839 gen_helper_sve_ldnf1sds_le_r_mte,
4840 gen_helper_sve_ldnf1hh_le_r_mte,
4841 gen_helper_sve_ldnf1hsu_le_r_mte,
4842 gen_helper_sve_ldnf1hdu_le_r_mte,
4843
4844 gen_helper_sve_ldnf1hds_le_r_mte,
4845 gen_helper_sve_ldnf1hss_le_r_mte,
4846 gen_helper_sve_ldnf1ss_le_r_mte,
4847 gen_helper_sve_ldnf1sdu_le_r_mte,
4848
4849 gen_helper_sve_ldnf1bds_r_mte,
4850 gen_helper_sve_ldnf1bss_r_mte,
4851 gen_helper_sve_ldnf1bhs_r_mte,
4852 gen_helper_sve_ldnf1dd_le_r_mte },
4853
4854 /* mte inactive, big-endian */
4855 { gen_helper_sve_ldnf1bb_r_mte,
4856 gen_helper_sve_ldnf1bhu_r_mte,
4857 gen_helper_sve_ldnf1bsu_r_mte,
4858 gen_helper_sve_ldnf1bdu_r_mte,
4859
4860 gen_helper_sve_ldnf1sds_be_r_mte,
4861 gen_helper_sve_ldnf1hh_be_r_mte,
4862 gen_helper_sve_ldnf1hsu_be_r_mte,
4863 gen_helper_sve_ldnf1hdu_be_r_mte,
4864
4865 gen_helper_sve_ldnf1hds_be_r_mte,
4866 gen_helper_sve_ldnf1hss_be_r_mte,
4867 gen_helper_sve_ldnf1ss_be_r_mte,
4868 gen_helper_sve_ldnf1sdu_be_r_mte,
4869
4870 gen_helper_sve_ldnf1bds_r_mte,
4871 gen_helper_sve_ldnf1bss_r_mte,
4872 gen_helper_sve_ldnf1bhs_r_mte,
4873 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4874 };
4875
4876 if (sve_access_check(s)) {
4877 int vsz = vec_full_reg_size(s);
4878 int elements = vsz >> dtype_esz[a->dtype];
4879 int off = (a->imm * elements) << dtype_msz(a->dtype);
4880 TCGv_i64 addr = new_tmp_a64(s);
4881
4882 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4883 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4884 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4885 }
4886 return true;
4887}
1a039c7e 4888
05abe304
RH
4889static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4890{
7d0a57a2
RH
4891 static gen_helper_gvec_mem * const fns[2][4] = {
4892 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4893 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4894 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4895 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
4896 };
4897 unsigned vsz = vec_full_reg_size(s);
4898 TCGv_ptr t_pg;
500d0484
RH
4899 TCGv_i32 t_desc;
4900 int desc, poff;
05abe304
RH
4901
4902 /* Load the first quadword using the normal predicated load helpers. */
ba080b86 4903 desc = simd_desc(16, 16, zt);
500d0484 4904 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
4905
4906 poff = pred_full_reg_offset(s, pg);
4907 if (vsz > 16) {
4908 /*
4909 * Zero-extend the first 16 bits of the predicate into a temporary.
4910 * This avoids triggering an assert making sure we don't have bits
4911 * set within a predicate beyond VQ, but we have lowered VQ to 1
4912 * for this load operation.
4913 */
4914 TCGv_i64 tmp = tcg_temp_new_i64();
4915#ifdef HOST_WORDS_BIGENDIAN
4916 poff += 6;
4917#endif
4918 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4919
4920 poff = offsetof(CPUARMState, vfp.preg_tmp);
4921 tcg_gen_st_i64(tmp, cpu_env, poff);
4922 tcg_temp_free_i64(tmp);
4923 }
4924
05abe304 4925 t_pg = tcg_temp_new_ptr();
2a99ab2b 4926 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4927
500d0484 4928 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
4929
4930 tcg_temp_free_ptr(t_pg);
500d0484 4931 tcg_temp_free_i32(t_desc);
05abe304
RH
4932
4933 /* Replicate that first quadword. */
4934 if (vsz > 16) {
4935 unsigned dofs = vec_full_reg_offset(s, zt);
4936 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4937 }
4938}
4939
3a7be554 4940static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4941{
4942 if (a->rm == 31) {
4943 return false;
4944 }
4945 if (sve_access_check(s)) {
4946 int msz = dtype_msz(a->dtype);
4947 TCGv_i64 addr = new_tmp_a64(s);
4948 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4949 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4950 do_ldrq(s, a->rd, a->pg, addr, msz);
4951 }
4952 return true;
4953}
4954
3a7be554 4955static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4956{
4957 if (sve_access_check(s)) {
4958 TCGv_i64 addr = new_tmp_a64(s);
4959 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4960 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4961 }
4962 return true;
4963}
4964
68459864 4965/* Load and broadcast element. */
3a7be554 4966static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 4967{
68459864
RH
4968 unsigned vsz = vec_full_reg_size(s);
4969 unsigned psz = pred_full_reg_size(s);
4970 unsigned esz = dtype_esz[a->dtype];
d0e372b0 4971 unsigned msz = dtype_msz(a->dtype);
c0ed9166 4972 TCGLabel *over;
4ac430e1 4973 TCGv_i64 temp, clean_addr;
68459864 4974
c0ed9166
RH
4975 if (!sve_access_check(s)) {
4976 return true;
4977 }
4978
4979 over = gen_new_label();
4980
68459864
RH
4981 /* If the guarding predicate has no bits set, no load occurs. */
4982 if (psz <= 8) {
4983 /* Reduce the pred_esz_masks value simply to reduce the
4984 * size of the code generated here.
4985 */
4986 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4987 temp = tcg_temp_new_i64();
4988 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4989 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4990 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4991 tcg_temp_free_i64(temp);
4992 } else {
4993 TCGv_i32 t32 = tcg_temp_new_i32();
4994 find_last_active(s, t32, esz, a->pg);
4995 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4996 tcg_temp_free_i32(t32);
4997 }
4998
4999 /* Load the data. */
5000 temp = tcg_temp_new_i64();
d0e372b0 5001 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5002 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5003
5004 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
68459864
RH
5005 s->be_data | dtype_mop[a->dtype]);
5006
5007 /* Broadcast to *all* elements. */
5008 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5009 vsz, vsz, temp);
5010 tcg_temp_free_i64(temp);
5011
5012 /* Zero the inactive elements. */
5013 gen_set_label(over);
60245996 5014 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5015}
5016
1a039c7e
RH
5017static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5018 int msz, int esz, int nreg)
5019{
71b9f394
RH
5020 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5021 { { { gen_helper_sve_st1bb_r,
5022 gen_helper_sve_st1bh_r,
5023 gen_helper_sve_st1bs_r,
5024 gen_helper_sve_st1bd_r },
5025 { NULL,
5026 gen_helper_sve_st1hh_le_r,
5027 gen_helper_sve_st1hs_le_r,
5028 gen_helper_sve_st1hd_le_r },
5029 { NULL, NULL,
5030 gen_helper_sve_st1ss_le_r,
5031 gen_helper_sve_st1sd_le_r },
5032 { NULL, NULL, NULL,
5033 gen_helper_sve_st1dd_le_r } },
5034 { { gen_helper_sve_st1bb_r,
5035 gen_helper_sve_st1bh_r,
5036 gen_helper_sve_st1bs_r,
5037 gen_helper_sve_st1bd_r },
5038 { NULL,
5039 gen_helper_sve_st1hh_be_r,
5040 gen_helper_sve_st1hs_be_r,
5041 gen_helper_sve_st1hd_be_r },
5042 { NULL, NULL,
5043 gen_helper_sve_st1ss_be_r,
5044 gen_helper_sve_st1sd_be_r },
5045 { NULL, NULL, NULL,
5046 gen_helper_sve_st1dd_be_r } } },
5047
5048 { { { gen_helper_sve_st1bb_r_mte,
5049 gen_helper_sve_st1bh_r_mte,
5050 gen_helper_sve_st1bs_r_mte,
5051 gen_helper_sve_st1bd_r_mte },
5052 { NULL,
5053 gen_helper_sve_st1hh_le_r_mte,
5054 gen_helper_sve_st1hs_le_r_mte,
5055 gen_helper_sve_st1hd_le_r_mte },
5056 { NULL, NULL,
5057 gen_helper_sve_st1ss_le_r_mte,
5058 gen_helper_sve_st1sd_le_r_mte },
5059 { NULL, NULL, NULL,
5060 gen_helper_sve_st1dd_le_r_mte } },
5061 { { gen_helper_sve_st1bb_r_mte,
5062 gen_helper_sve_st1bh_r_mte,
5063 gen_helper_sve_st1bs_r_mte,
5064 gen_helper_sve_st1bd_r_mte },
5065 { NULL,
5066 gen_helper_sve_st1hh_be_r_mte,
5067 gen_helper_sve_st1hs_be_r_mte,
5068 gen_helper_sve_st1hd_be_r_mte },
5069 { NULL, NULL,
5070 gen_helper_sve_st1ss_be_r_mte,
5071 gen_helper_sve_st1sd_be_r_mte },
5072 { NULL, NULL, NULL,
5073 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5074 };
71b9f394
RH
5075 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5076 { { { gen_helper_sve_st2bb_r,
5077 gen_helper_sve_st2hh_le_r,
5078 gen_helper_sve_st2ss_le_r,
5079 gen_helper_sve_st2dd_le_r },
5080 { gen_helper_sve_st3bb_r,
5081 gen_helper_sve_st3hh_le_r,
5082 gen_helper_sve_st3ss_le_r,
5083 gen_helper_sve_st3dd_le_r },
5084 { gen_helper_sve_st4bb_r,
5085 gen_helper_sve_st4hh_le_r,
5086 gen_helper_sve_st4ss_le_r,
5087 gen_helper_sve_st4dd_le_r } },
5088 { { gen_helper_sve_st2bb_r,
5089 gen_helper_sve_st2hh_be_r,
5090 gen_helper_sve_st2ss_be_r,
5091 gen_helper_sve_st2dd_be_r },
5092 { gen_helper_sve_st3bb_r,
5093 gen_helper_sve_st3hh_be_r,
5094 gen_helper_sve_st3ss_be_r,
5095 gen_helper_sve_st3dd_be_r },
5096 { gen_helper_sve_st4bb_r,
5097 gen_helper_sve_st4hh_be_r,
5098 gen_helper_sve_st4ss_be_r,
5099 gen_helper_sve_st4dd_be_r } } },
5100 { { { gen_helper_sve_st2bb_r_mte,
5101 gen_helper_sve_st2hh_le_r_mte,
5102 gen_helper_sve_st2ss_le_r_mte,
5103 gen_helper_sve_st2dd_le_r_mte },
5104 { gen_helper_sve_st3bb_r_mte,
5105 gen_helper_sve_st3hh_le_r_mte,
5106 gen_helper_sve_st3ss_le_r_mte,
5107 gen_helper_sve_st3dd_le_r_mte },
5108 { gen_helper_sve_st4bb_r_mte,
5109 gen_helper_sve_st4hh_le_r_mte,
5110 gen_helper_sve_st4ss_le_r_mte,
5111 gen_helper_sve_st4dd_le_r_mte } },
5112 { { gen_helper_sve_st2bb_r_mte,
5113 gen_helper_sve_st2hh_be_r_mte,
5114 gen_helper_sve_st2ss_be_r_mte,
5115 gen_helper_sve_st2dd_be_r_mte },
5116 { gen_helper_sve_st3bb_r_mte,
5117 gen_helper_sve_st3hh_be_r_mte,
5118 gen_helper_sve_st3ss_be_r_mte,
5119 gen_helper_sve_st3dd_be_r_mte },
5120 { gen_helper_sve_st4bb_r_mte,
5121 gen_helper_sve_st4hh_be_r_mte,
5122 gen_helper_sve_st4ss_be_r_mte,
5123 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5124 };
5125 gen_helper_gvec_mem *fn;
28d57f2d 5126 int be = s->be_data == MO_BE;
1a039c7e
RH
5127
5128 if (nreg == 0) {
5129 /* ST1 */
71b9f394
RH
5130 fn = fn_single[s->mte_active[0]][be][msz][esz];
5131 nreg = 1;
1a039c7e
RH
5132 } else {
5133 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5134 assert(msz == esz);
71b9f394 5135 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5136 }
5137 assert(fn != NULL);
71b9f394 5138 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5139}
5140
3a7be554 5141static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5142{
5143 if (a->rm == 31 || a->msz > a->esz) {
5144 return false;
5145 }
5146 if (sve_access_check(s)) {
5147 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5148 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5149 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5150 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5151 }
5152 return true;
5153}
5154
3a7be554 5155static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5156{
5157 if (a->msz > a->esz) {
5158 return false;
5159 }
5160 if (sve_access_check(s)) {
5161 int vsz = vec_full_reg_size(s);
5162 int elements = vsz >> a->esz;
5163 TCGv_i64 addr = new_tmp_a64(s);
5164
5165 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5166 (a->imm * elements * (a->nreg + 1)) << a->msz);
5167 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5168 }
5169 return true;
5170}
f6dbf62a
RH
5171
5172/*
5173 *** SVE gather loads / scatter stores
5174 */
5175
500d0484 5176static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5177 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5178 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5179{
5180 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5181 TCGv_ptr t_zm = tcg_temp_new_ptr();
5182 TCGv_ptr t_pg = tcg_temp_new_ptr();
5183 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 5184 TCGv_i32 t_desc;
d28d12f0 5185 int desc = 0;
500d0484 5186
d28d12f0
RH
5187 if (s->mte_active[0]) {
5188 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5189 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5190 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5191 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5192 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5193 desc <<= SVE_MTEDESC_SHIFT;
5194 }
cdecb3fc 5195 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 5196 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5197
5198 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5199 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5200 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5201 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5202
5203 tcg_temp_free_ptr(t_zt);
5204 tcg_temp_free_ptr(t_zm);
5205 tcg_temp_free_ptr(t_pg);
500d0484 5206 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5207}
5208
d28d12f0
RH
5209/* Indexed by [mte][be][ff][xs][u][msz]. */
5210static gen_helper_gvec_mem_scatter * const
5211gather_load_fn32[2][2][2][2][2][3] = {
5212 { /* MTE Inactive */
5213 { /* Little-endian */
5214 { { { gen_helper_sve_ldbss_zsu,
5215 gen_helper_sve_ldhss_le_zsu,
5216 NULL, },
5217 { gen_helper_sve_ldbsu_zsu,
5218 gen_helper_sve_ldhsu_le_zsu,
5219 gen_helper_sve_ldss_le_zsu, } },
5220 { { gen_helper_sve_ldbss_zss,
5221 gen_helper_sve_ldhss_le_zss,
5222 NULL, },
5223 { gen_helper_sve_ldbsu_zss,
5224 gen_helper_sve_ldhsu_le_zss,
5225 gen_helper_sve_ldss_le_zss, } } },
5226
5227 /* First-fault */
5228 { { { gen_helper_sve_ldffbss_zsu,
5229 gen_helper_sve_ldffhss_le_zsu,
5230 NULL, },
5231 { gen_helper_sve_ldffbsu_zsu,
5232 gen_helper_sve_ldffhsu_le_zsu,
5233 gen_helper_sve_ldffss_le_zsu, } },
5234 { { gen_helper_sve_ldffbss_zss,
5235 gen_helper_sve_ldffhss_le_zss,
5236 NULL, },
5237 { gen_helper_sve_ldffbsu_zss,
5238 gen_helper_sve_ldffhsu_le_zss,
5239 gen_helper_sve_ldffss_le_zss, } } } },
5240
5241 { /* Big-endian */
5242 { { { gen_helper_sve_ldbss_zsu,
5243 gen_helper_sve_ldhss_be_zsu,
5244 NULL, },
5245 { gen_helper_sve_ldbsu_zsu,
5246 gen_helper_sve_ldhsu_be_zsu,
5247 gen_helper_sve_ldss_be_zsu, } },
5248 { { gen_helper_sve_ldbss_zss,
5249 gen_helper_sve_ldhss_be_zss,
5250 NULL, },
5251 { gen_helper_sve_ldbsu_zss,
5252 gen_helper_sve_ldhsu_be_zss,
5253 gen_helper_sve_ldss_be_zss, } } },
5254
5255 /* First-fault */
5256 { { { gen_helper_sve_ldffbss_zsu,
5257 gen_helper_sve_ldffhss_be_zsu,
5258 NULL, },
5259 { gen_helper_sve_ldffbsu_zsu,
5260 gen_helper_sve_ldffhsu_be_zsu,
5261 gen_helper_sve_ldffss_be_zsu, } },
5262 { { gen_helper_sve_ldffbss_zss,
5263 gen_helper_sve_ldffhss_be_zss,
5264 NULL, },
5265 { gen_helper_sve_ldffbsu_zss,
5266 gen_helper_sve_ldffhsu_be_zss,
5267 gen_helper_sve_ldffss_be_zss, } } } } },
5268 { /* MTE Active */
5269 { /* Little-endian */
5270 { { { gen_helper_sve_ldbss_zsu_mte,
5271 gen_helper_sve_ldhss_le_zsu_mte,
5272 NULL, },
5273 { gen_helper_sve_ldbsu_zsu_mte,
5274 gen_helper_sve_ldhsu_le_zsu_mte,
5275 gen_helper_sve_ldss_le_zsu_mte, } },
5276 { { gen_helper_sve_ldbss_zss_mte,
5277 gen_helper_sve_ldhss_le_zss_mte,
5278 NULL, },
5279 { gen_helper_sve_ldbsu_zss_mte,
5280 gen_helper_sve_ldhsu_le_zss_mte,
5281 gen_helper_sve_ldss_le_zss_mte, } } },
5282
5283 /* First-fault */
5284 { { { gen_helper_sve_ldffbss_zsu_mte,
5285 gen_helper_sve_ldffhss_le_zsu_mte,
5286 NULL, },
5287 { gen_helper_sve_ldffbsu_zsu_mte,
5288 gen_helper_sve_ldffhsu_le_zsu_mte,
5289 gen_helper_sve_ldffss_le_zsu_mte, } },
5290 { { gen_helper_sve_ldffbss_zss_mte,
5291 gen_helper_sve_ldffhss_le_zss_mte,
5292 NULL, },
5293 { gen_helper_sve_ldffbsu_zss_mte,
5294 gen_helper_sve_ldffhsu_le_zss_mte,
5295 gen_helper_sve_ldffss_le_zss_mte, } } } },
5296
5297 { /* Big-endian */
5298 { { { gen_helper_sve_ldbss_zsu_mte,
5299 gen_helper_sve_ldhss_be_zsu_mte,
5300 NULL, },
5301 { gen_helper_sve_ldbsu_zsu_mte,
5302 gen_helper_sve_ldhsu_be_zsu_mte,
5303 gen_helper_sve_ldss_be_zsu_mte, } },
5304 { { gen_helper_sve_ldbss_zss_mte,
5305 gen_helper_sve_ldhss_be_zss_mte,
5306 NULL, },
5307 { gen_helper_sve_ldbsu_zss_mte,
5308 gen_helper_sve_ldhsu_be_zss_mte,
5309 gen_helper_sve_ldss_be_zss_mte, } } },
5310
5311 /* First-fault */
5312 { { { gen_helper_sve_ldffbss_zsu_mte,
5313 gen_helper_sve_ldffhss_be_zsu_mte,
5314 NULL, },
5315 { gen_helper_sve_ldffbsu_zsu_mte,
5316 gen_helper_sve_ldffhsu_be_zsu_mte,
5317 gen_helper_sve_ldffss_be_zsu_mte, } },
5318 { { gen_helper_sve_ldffbss_zss_mte,
5319 gen_helper_sve_ldffhss_be_zss_mte,
5320 NULL, },
5321 { gen_helper_sve_ldffbsu_zss_mte,
5322 gen_helper_sve_ldffhsu_be_zss_mte,
5323 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5324};
5325
5326/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5327static gen_helper_gvec_mem_scatter * const
5328gather_load_fn64[2][2][2][3][2][4] = {
5329 { /* MTE Inactive */
5330 { /* Little-endian */
5331 { { { gen_helper_sve_ldbds_zsu,
5332 gen_helper_sve_ldhds_le_zsu,
5333 gen_helper_sve_ldsds_le_zsu,
5334 NULL, },
5335 { gen_helper_sve_ldbdu_zsu,
5336 gen_helper_sve_ldhdu_le_zsu,
5337 gen_helper_sve_ldsdu_le_zsu,
5338 gen_helper_sve_lddd_le_zsu, } },
5339 { { gen_helper_sve_ldbds_zss,
5340 gen_helper_sve_ldhds_le_zss,
5341 gen_helper_sve_ldsds_le_zss,
5342 NULL, },
5343 { gen_helper_sve_ldbdu_zss,
5344 gen_helper_sve_ldhdu_le_zss,
5345 gen_helper_sve_ldsdu_le_zss,
5346 gen_helper_sve_lddd_le_zss, } },
5347 { { gen_helper_sve_ldbds_zd,
5348 gen_helper_sve_ldhds_le_zd,
5349 gen_helper_sve_ldsds_le_zd,
5350 NULL, },
5351 { gen_helper_sve_ldbdu_zd,
5352 gen_helper_sve_ldhdu_le_zd,
5353 gen_helper_sve_ldsdu_le_zd,
5354 gen_helper_sve_lddd_le_zd, } } },
5355
5356 /* First-fault */
5357 { { { gen_helper_sve_ldffbds_zsu,
5358 gen_helper_sve_ldffhds_le_zsu,
5359 gen_helper_sve_ldffsds_le_zsu,
5360 NULL, },
5361 { gen_helper_sve_ldffbdu_zsu,
5362 gen_helper_sve_ldffhdu_le_zsu,
5363 gen_helper_sve_ldffsdu_le_zsu,
5364 gen_helper_sve_ldffdd_le_zsu, } },
5365 { { gen_helper_sve_ldffbds_zss,
5366 gen_helper_sve_ldffhds_le_zss,
5367 gen_helper_sve_ldffsds_le_zss,
5368 NULL, },
5369 { gen_helper_sve_ldffbdu_zss,
5370 gen_helper_sve_ldffhdu_le_zss,
5371 gen_helper_sve_ldffsdu_le_zss,
5372 gen_helper_sve_ldffdd_le_zss, } },
5373 { { gen_helper_sve_ldffbds_zd,
5374 gen_helper_sve_ldffhds_le_zd,
5375 gen_helper_sve_ldffsds_le_zd,
5376 NULL, },
5377 { gen_helper_sve_ldffbdu_zd,
5378 gen_helper_sve_ldffhdu_le_zd,
5379 gen_helper_sve_ldffsdu_le_zd,
5380 gen_helper_sve_ldffdd_le_zd, } } } },
5381 { /* Big-endian */
5382 { { { gen_helper_sve_ldbds_zsu,
5383 gen_helper_sve_ldhds_be_zsu,
5384 gen_helper_sve_ldsds_be_zsu,
5385 NULL, },
5386 { gen_helper_sve_ldbdu_zsu,
5387 gen_helper_sve_ldhdu_be_zsu,
5388 gen_helper_sve_ldsdu_be_zsu,
5389 gen_helper_sve_lddd_be_zsu, } },
5390 { { gen_helper_sve_ldbds_zss,
5391 gen_helper_sve_ldhds_be_zss,
5392 gen_helper_sve_ldsds_be_zss,
5393 NULL, },
5394 { gen_helper_sve_ldbdu_zss,
5395 gen_helper_sve_ldhdu_be_zss,
5396 gen_helper_sve_ldsdu_be_zss,
5397 gen_helper_sve_lddd_be_zss, } },
5398 { { gen_helper_sve_ldbds_zd,
5399 gen_helper_sve_ldhds_be_zd,
5400 gen_helper_sve_ldsds_be_zd,
5401 NULL, },
5402 { gen_helper_sve_ldbdu_zd,
5403 gen_helper_sve_ldhdu_be_zd,
5404 gen_helper_sve_ldsdu_be_zd,
5405 gen_helper_sve_lddd_be_zd, } } },
5406
5407 /* First-fault */
5408 { { { gen_helper_sve_ldffbds_zsu,
5409 gen_helper_sve_ldffhds_be_zsu,
5410 gen_helper_sve_ldffsds_be_zsu,
5411 NULL, },
5412 { gen_helper_sve_ldffbdu_zsu,
5413 gen_helper_sve_ldffhdu_be_zsu,
5414 gen_helper_sve_ldffsdu_be_zsu,
5415 gen_helper_sve_ldffdd_be_zsu, } },
5416 { { gen_helper_sve_ldffbds_zss,
5417 gen_helper_sve_ldffhds_be_zss,
5418 gen_helper_sve_ldffsds_be_zss,
5419 NULL, },
5420 { gen_helper_sve_ldffbdu_zss,
5421 gen_helper_sve_ldffhdu_be_zss,
5422 gen_helper_sve_ldffsdu_be_zss,
5423 gen_helper_sve_ldffdd_be_zss, } },
5424 { { gen_helper_sve_ldffbds_zd,
5425 gen_helper_sve_ldffhds_be_zd,
5426 gen_helper_sve_ldffsds_be_zd,
5427 NULL, },
5428 { gen_helper_sve_ldffbdu_zd,
5429 gen_helper_sve_ldffhdu_be_zd,
5430 gen_helper_sve_ldffsdu_be_zd,
5431 gen_helper_sve_ldffdd_be_zd, } } } } },
5432 { /* MTE Active */
5433 { /* Little-endian */
5434 { { { gen_helper_sve_ldbds_zsu_mte,
5435 gen_helper_sve_ldhds_le_zsu_mte,
5436 gen_helper_sve_ldsds_le_zsu_mte,
5437 NULL, },
5438 { gen_helper_sve_ldbdu_zsu_mte,
5439 gen_helper_sve_ldhdu_le_zsu_mte,
5440 gen_helper_sve_ldsdu_le_zsu_mte,
5441 gen_helper_sve_lddd_le_zsu_mte, } },
5442 { { gen_helper_sve_ldbds_zss_mte,
5443 gen_helper_sve_ldhds_le_zss_mte,
5444 gen_helper_sve_ldsds_le_zss_mte,
5445 NULL, },
5446 { gen_helper_sve_ldbdu_zss_mte,
5447 gen_helper_sve_ldhdu_le_zss_mte,
5448 gen_helper_sve_ldsdu_le_zss_mte,
5449 gen_helper_sve_lddd_le_zss_mte, } },
5450 { { gen_helper_sve_ldbds_zd_mte,
5451 gen_helper_sve_ldhds_le_zd_mte,
5452 gen_helper_sve_ldsds_le_zd_mte,
5453 NULL, },
5454 { gen_helper_sve_ldbdu_zd_mte,
5455 gen_helper_sve_ldhdu_le_zd_mte,
5456 gen_helper_sve_ldsdu_le_zd_mte,
5457 gen_helper_sve_lddd_le_zd_mte, } } },
5458
5459 /* First-fault */
5460 { { { gen_helper_sve_ldffbds_zsu_mte,
5461 gen_helper_sve_ldffhds_le_zsu_mte,
5462 gen_helper_sve_ldffsds_le_zsu_mte,
5463 NULL, },
5464 { gen_helper_sve_ldffbdu_zsu_mte,
5465 gen_helper_sve_ldffhdu_le_zsu_mte,
5466 gen_helper_sve_ldffsdu_le_zsu_mte,
5467 gen_helper_sve_ldffdd_le_zsu_mte, } },
5468 { { gen_helper_sve_ldffbds_zss_mte,
5469 gen_helper_sve_ldffhds_le_zss_mte,
5470 gen_helper_sve_ldffsds_le_zss_mte,
5471 NULL, },
5472 { gen_helper_sve_ldffbdu_zss_mte,
5473 gen_helper_sve_ldffhdu_le_zss_mte,
5474 gen_helper_sve_ldffsdu_le_zss_mte,
5475 gen_helper_sve_ldffdd_le_zss_mte, } },
5476 { { gen_helper_sve_ldffbds_zd_mte,
5477 gen_helper_sve_ldffhds_le_zd_mte,
5478 gen_helper_sve_ldffsds_le_zd_mte,
5479 NULL, },
5480 { gen_helper_sve_ldffbdu_zd_mte,
5481 gen_helper_sve_ldffhdu_le_zd_mte,
5482 gen_helper_sve_ldffsdu_le_zd_mte,
5483 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5484 { /* Big-endian */
5485 { { { gen_helper_sve_ldbds_zsu_mte,
5486 gen_helper_sve_ldhds_be_zsu_mte,
5487 gen_helper_sve_ldsds_be_zsu_mte,
5488 NULL, },
5489 { gen_helper_sve_ldbdu_zsu_mte,
5490 gen_helper_sve_ldhdu_be_zsu_mte,
5491 gen_helper_sve_ldsdu_be_zsu_mte,
5492 gen_helper_sve_lddd_be_zsu_mte, } },
5493 { { gen_helper_sve_ldbds_zss_mte,
5494 gen_helper_sve_ldhds_be_zss_mte,
5495 gen_helper_sve_ldsds_be_zss_mte,
5496 NULL, },
5497 { gen_helper_sve_ldbdu_zss_mte,
5498 gen_helper_sve_ldhdu_be_zss_mte,
5499 gen_helper_sve_ldsdu_be_zss_mte,
5500 gen_helper_sve_lddd_be_zss_mte, } },
5501 { { gen_helper_sve_ldbds_zd_mte,
5502 gen_helper_sve_ldhds_be_zd_mte,
5503 gen_helper_sve_ldsds_be_zd_mte,
5504 NULL, },
5505 { gen_helper_sve_ldbdu_zd_mte,
5506 gen_helper_sve_ldhdu_be_zd_mte,
5507 gen_helper_sve_ldsdu_be_zd_mte,
5508 gen_helper_sve_lddd_be_zd_mte, } } },
5509
5510 /* First-fault */
5511 { { { gen_helper_sve_ldffbds_zsu_mte,
5512 gen_helper_sve_ldffhds_be_zsu_mte,
5513 gen_helper_sve_ldffsds_be_zsu_mte,
5514 NULL, },
5515 { gen_helper_sve_ldffbdu_zsu_mte,
5516 gen_helper_sve_ldffhdu_be_zsu_mte,
5517 gen_helper_sve_ldffsdu_be_zsu_mte,
5518 gen_helper_sve_ldffdd_be_zsu_mte, } },
5519 { { gen_helper_sve_ldffbds_zss_mte,
5520 gen_helper_sve_ldffhds_be_zss_mte,
5521 gen_helper_sve_ldffsds_be_zss_mte,
5522 NULL, },
5523 { gen_helper_sve_ldffbdu_zss_mte,
5524 gen_helper_sve_ldffhdu_be_zss_mte,
5525 gen_helper_sve_ldffsdu_be_zss_mte,
5526 gen_helper_sve_ldffdd_be_zss_mte, } },
5527 { { gen_helper_sve_ldffbds_zd_mte,
5528 gen_helper_sve_ldffhds_be_zd_mte,
5529 gen_helper_sve_ldffsds_be_zd_mte,
5530 NULL, },
5531 { gen_helper_sve_ldffbdu_zd_mte,
5532 gen_helper_sve_ldffhdu_be_zd_mte,
5533 gen_helper_sve_ldffsdu_be_zd_mte,
5534 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5535};
5536
3a7be554 5537static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5538{
5539 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5540 bool be = s->be_data == MO_BE;
5541 bool mte = s->mte_active[0];
673e9fa6
RH
5542
5543 if (!sve_access_check(s)) {
5544 return true;
5545 }
5546
5547 switch (a->esz) {
5548 case MO_32:
d28d12f0 5549 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5550 break;
5551 case MO_64:
d28d12f0 5552 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5553 break;
5554 }
5555 assert(fn != NULL);
5556
5557 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5558 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5559 return true;
5560}
5561
3a7be554 5562static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5563{
5564 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5565 bool be = s->be_data == MO_BE;
5566 bool mte = s->mte_active[0];
673e9fa6
RH
5567 TCGv_i64 imm;
5568
5569 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5570 return false;
5571 }
5572 if (!sve_access_check(s)) {
5573 return true;
5574 }
5575
5576 switch (a->esz) {
5577 case MO_32:
d28d12f0 5578 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5579 break;
5580 case MO_64:
d28d12f0 5581 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5582 break;
5583 }
5584 assert(fn != NULL);
5585
5586 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5587 * by loading the immediate into the scalar parameter.
5588 */
5589 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5590 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
5591 tcg_temp_free_i64(imm);
5592 return true;
5593}
5594
d28d12f0
RH
5595/* Indexed by [mte][be][xs][msz]. */
5596static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5597 { /* MTE Inactive */
5598 { /* Little-endian */
5599 { gen_helper_sve_stbs_zsu,
5600 gen_helper_sve_sths_le_zsu,
5601 gen_helper_sve_stss_le_zsu, },
5602 { gen_helper_sve_stbs_zss,
5603 gen_helper_sve_sths_le_zss,
5604 gen_helper_sve_stss_le_zss, } },
5605 { /* Big-endian */
5606 { gen_helper_sve_stbs_zsu,
5607 gen_helper_sve_sths_be_zsu,
5608 gen_helper_sve_stss_be_zsu, },
5609 { gen_helper_sve_stbs_zss,
5610 gen_helper_sve_sths_be_zss,
5611 gen_helper_sve_stss_be_zss, } } },
5612 { /* MTE Active */
5613 { /* Little-endian */
5614 { gen_helper_sve_stbs_zsu_mte,
5615 gen_helper_sve_sths_le_zsu_mte,
5616 gen_helper_sve_stss_le_zsu_mte, },
5617 { gen_helper_sve_stbs_zss_mte,
5618 gen_helper_sve_sths_le_zss_mte,
5619 gen_helper_sve_stss_le_zss_mte, } },
5620 { /* Big-endian */
5621 { gen_helper_sve_stbs_zsu_mte,
5622 gen_helper_sve_sths_be_zsu_mte,
5623 gen_helper_sve_stss_be_zsu_mte, },
5624 { gen_helper_sve_stbs_zss_mte,
5625 gen_helper_sve_sths_be_zss_mte,
5626 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5627};
5628
5629/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5630static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5631 { /* MTE Inactive */
5632 { /* Little-endian */
5633 { gen_helper_sve_stbd_zsu,
5634 gen_helper_sve_sthd_le_zsu,
5635 gen_helper_sve_stsd_le_zsu,
5636 gen_helper_sve_stdd_le_zsu, },
5637 { gen_helper_sve_stbd_zss,
5638 gen_helper_sve_sthd_le_zss,
5639 gen_helper_sve_stsd_le_zss,
5640 gen_helper_sve_stdd_le_zss, },
5641 { gen_helper_sve_stbd_zd,
5642 gen_helper_sve_sthd_le_zd,
5643 gen_helper_sve_stsd_le_zd,
5644 gen_helper_sve_stdd_le_zd, } },
5645 { /* Big-endian */
5646 { gen_helper_sve_stbd_zsu,
5647 gen_helper_sve_sthd_be_zsu,
5648 gen_helper_sve_stsd_be_zsu,
5649 gen_helper_sve_stdd_be_zsu, },
5650 { gen_helper_sve_stbd_zss,
5651 gen_helper_sve_sthd_be_zss,
5652 gen_helper_sve_stsd_be_zss,
5653 gen_helper_sve_stdd_be_zss, },
5654 { gen_helper_sve_stbd_zd,
5655 gen_helper_sve_sthd_be_zd,
5656 gen_helper_sve_stsd_be_zd,
5657 gen_helper_sve_stdd_be_zd, } } },
5658 { /* MTE Inactive */
5659 { /* Little-endian */
5660 { gen_helper_sve_stbd_zsu_mte,
5661 gen_helper_sve_sthd_le_zsu_mte,
5662 gen_helper_sve_stsd_le_zsu_mte,
5663 gen_helper_sve_stdd_le_zsu_mte, },
5664 { gen_helper_sve_stbd_zss_mte,
5665 gen_helper_sve_sthd_le_zss_mte,
5666 gen_helper_sve_stsd_le_zss_mte,
5667 gen_helper_sve_stdd_le_zss_mte, },
5668 { gen_helper_sve_stbd_zd_mte,
5669 gen_helper_sve_sthd_le_zd_mte,
5670 gen_helper_sve_stsd_le_zd_mte,
5671 gen_helper_sve_stdd_le_zd_mte, } },
5672 { /* Big-endian */
5673 { gen_helper_sve_stbd_zsu_mte,
5674 gen_helper_sve_sthd_be_zsu_mte,
5675 gen_helper_sve_stsd_be_zsu_mte,
5676 gen_helper_sve_stdd_be_zsu_mte, },
5677 { gen_helper_sve_stbd_zss_mte,
5678 gen_helper_sve_sthd_be_zss_mte,
5679 gen_helper_sve_stsd_be_zss_mte,
5680 gen_helper_sve_stdd_be_zss_mte, },
5681 { gen_helper_sve_stbd_zd_mte,
5682 gen_helper_sve_sthd_be_zd_mte,
5683 gen_helper_sve_stsd_be_zd_mte,
5684 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5685};
5686
3a7be554 5687static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5688{
f6dbf62a 5689 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5690 bool be = s->be_data == MO_BE;
5691 bool mte = s->mte_active[0];
f6dbf62a
RH
5692
5693 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5694 return false;
5695 }
5696 if (!sve_access_check(s)) {
5697 return true;
5698 }
5699 switch (a->esz) {
5700 case MO_32:
d28d12f0 5701 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5702 break;
5703 case MO_64:
d28d12f0 5704 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5705 break;
5706 default:
5707 g_assert_not_reached();
5708 }
5709 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5710 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5711 return true;
5712}
dec6cf6b 5713
3a7be554 5714static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5715{
5716 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5717 bool be = s->be_data == MO_BE;
5718 bool mte = s->mte_active[0];
408ecde9
RH
5719 TCGv_i64 imm;
5720
5721 if (a->esz < a->msz) {
5722 return false;
5723 }
5724 if (!sve_access_check(s)) {
5725 return true;
5726 }
5727
5728 switch (a->esz) {
5729 case MO_32:
d28d12f0 5730 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5731 break;
5732 case MO_64:
d28d12f0 5733 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5734 break;
5735 }
5736 assert(fn != NULL);
5737
5738 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5739 * by loading the immediate into the scalar parameter.
5740 */
5741 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5742 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
5743 tcg_temp_free_i64(imm);
5744 return true;
5745}
5746
dec6cf6b
RH
5747/*
5748 * Prefetches
5749 */
5750
3a7be554 5751static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5752{
5753 /* Prefetch is a nop within QEMU. */
2f95a3b0 5754 (void)sve_access_check(s);
dec6cf6b
RH
5755 return true;
5756}
5757
3a7be554 5758static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5759{
5760 if (a->rm == 31) {
5761 return false;
5762 }
5763 /* Prefetch is a nop within QEMU. */
2f95a3b0 5764 (void)sve_access_check(s);
dec6cf6b
RH
5765 return true;
5766}
a2103582
RH
5767
5768/*
5769 * Move Prefix
5770 *
5771 * TODO: The implementation so far could handle predicated merging movprfx.
5772 * The helper functions as written take an extra source register to
5773 * use in the operation, but the result is only written when predication
5774 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5775 * to allow the final write back to the destination to be unconditional.
5776 * For predicated zeroing movprfx, we need to rearrange the helpers to
5777 * allow the final write back to zero inactives.
5778 *
5779 * In the meantime, just emit the moves.
5780 */
5781
3a7be554 5782static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
5783{
5784 return do_mov_z(s, a->rd, a->rn);
5785}
5786
3a7be554 5787static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5788{
5789 if (sve_access_check(s)) {
5790 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5791 }
5792 return true;
5793}
5794
3a7be554 5795static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 5796{
60245996 5797 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 5798}