]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Introduce gen_gvec_ool_arg_zzz
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638
RH
178/* Invoke an out-of-line helper on 4 Zregs. */
179static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
180 int rd, int rn, int rm, int ra, int data)
181{
182 unsigned vsz = vec_full_reg_size(s);
183 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
184 vec_full_reg_offset(s, rn),
185 vec_full_reg_offset(s, rm),
186 vec_full_reg_offset(s, ra),
187 vsz, vsz, data, fn);
188}
189
96a461f7
RH
190/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
191static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
192 int rd, int rn, int pg, int data)
193{
194 unsigned vsz = vec_full_reg_size(s);
195 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
196 vec_full_reg_offset(s, rn),
197 pred_full_reg_offset(s, pg),
198 vsz, vsz, data, fn);
199}
200
36cbb7a8
RH
201/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
202static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
203 int rd, int rn, int rm, int pg, int data)
204{
205 unsigned vsz = vec_full_reg_size(s);
206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
207 vec_full_reg_offset(s, rn),
208 vec_full_reg_offset(s, rm),
209 pred_full_reg_offset(s, pg),
210 vsz, vsz, data, fn);
211}
f7d79c41 212
36cbb7a8 213/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
214static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
215 int esz, int rd, int rn)
38388f7e 216{
f7d79c41
RH
217 unsigned vsz = vec_full_reg_size(s);
218 gvec_fn(esz, vec_full_reg_offset(s, rd),
219 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
220}
221
39eea561 222/* Invoke a vector expander on three Zregs. */
28c4da31
RH
223static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
224 int esz, int rd, int rn, int rm)
38388f7e 225{
28c4da31
RH
226 unsigned vsz = vec_full_reg_size(s);
227 gvec_fn(esz, vec_full_reg_offset(s, rd),
228 vec_full_reg_offset(s, rn),
229 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
230}
231
911cdc6d
RH
232/* Invoke a vector expander on four Zregs. */
233static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
234 int esz, int rd, int rn, int rm, int ra)
235{
236 unsigned vsz = vec_full_reg_size(s);
237 gvec_fn(esz, vec_full_reg_offset(s, rd),
238 vec_full_reg_offset(s, rn),
239 vec_full_reg_offset(s, rm),
240 vec_full_reg_offset(s, ra), vsz, vsz);
241}
242
39eea561
RH
243/* Invoke a vector move on two Zregs. */
244static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 245{
f7d79c41
RH
246 if (sve_access_check(s)) {
247 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
248 }
249 return true;
38388f7e
RH
250}
251
d9d78dcc
RH
252/* Initialize a Zreg with replications of a 64-bit immediate. */
253static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
254{
255 unsigned vsz = vec_full_reg_size(s);
8711e71f 256 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
257}
258
516e246a 259/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
260static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
261 int rd, int rn, int rm)
516e246a 262{
dd81a8d7
RH
263 unsigned psz = pred_gvec_reg_size(s);
264 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
265 pred_full_reg_offset(s, rn),
266 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
267}
268
269/* Invoke a vector move on two Pregs. */
270static bool do_mov_p(DisasContext *s, int rd, int rn)
271{
d0b2df5a
RH
272 if (sve_access_check(s)) {
273 unsigned psz = pred_gvec_reg_size(s);
274 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
275 pred_full_reg_offset(s, rn), psz, psz);
276 }
277 return true;
516e246a
RH
278}
279
9e18d7a6
RH
280/* Set the cpu flags as per a return from an SVE helper. */
281static void do_pred_flags(TCGv_i32 t)
282{
283 tcg_gen_mov_i32(cpu_NF, t);
284 tcg_gen_andi_i32(cpu_ZF, t, 2);
285 tcg_gen_andi_i32(cpu_CF, t, 1);
286 tcg_gen_movi_i32(cpu_VF, 0);
287}
288
289/* Subroutines computing the ARM PredTest psuedofunction. */
290static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
291{
292 TCGv_i32 t = tcg_temp_new_i32();
293
294 gen_helper_sve_predtest1(t, d, g);
295 do_pred_flags(t);
296 tcg_temp_free_i32(t);
297}
298
299static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
300{
301 TCGv_ptr dptr = tcg_temp_new_ptr();
302 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 303 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
304
305 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
306 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 307
392acacc 308 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
309 tcg_temp_free_ptr(dptr);
310 tcg_temp_free_ptr(gptr);
311
312 do_pred_flags(t);
313 tcg_temp_free_i32(t);
314}
315
028e2a7b
RH
316/* For each element size, the bits within a predicate word that are active. */
317const uint64_t pred_esz_masks[4] = {
318 0xffffffffffffffffull, 0x5555555555555555ull,
319 0x1111111111111111ull, 0x0101010101010101ull
320};
321
39eea561
RH
322/*
323 *** SVE Logical - Unpredicated Group
324 */
325
28c4da31
RH
326static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
327{
328 if (sve_access_check(s)) {
329 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
330 }
331 return true;
332}
333
3a7be554 334static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 335{
28c4da31 336 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
337}
338
3a7be554 339static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 340{
28c4da31 341 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
342}
343
3a7be554 344static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 345{
28c4da31 346 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
347}
348
3a7be554 349static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 350{
28c4da31 351 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 352}
d1822297 353
e6eba6e5
RH
354static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
355{
356 TCGv_i64 t = tcg_temp_new_i64();
357 uint64_t mask = dup_const(MO_8, 0xff >> sh);
358
359 tcg_gen_xor_i64(t, n, m);
360 tcg_gen_shri_i64(d, t, sh);
361 tcg_gen_shli_i64(t, t, 8 - sh);
362 tcg_gen_andi_i64(d, d, mask);
363 tcg_gen_andi_i64(t, t, ~mask);
364 tcg_gen_or_i64(d, d, t);
365 tcg_temp_free_i64(t);
366}
367
368static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
369{
370 TCGv_i64 t = tcg_temp_new_i64();
371 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
372
373 tcg_gen_xor_i64(t, n, m);
374 tcg_gen_shri_i64(d, t, sh);
375 tcg_gen_shli_i64(t, t, 16 - sh);
376 tcg_gen_andi_i64(d, d, mask);
377 tcg_gen_andi_i64(t, t, ~mask);
378 tcg_gen_or_i64(d, d, t);
379 tcg_temp_free_i64(t);
380}
381
382static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
383{
384 tcg_gen_xor_i32(d, n, m);
385 tcg_gen_rotri_i32(d, d, sh);
386}
387
388static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
389{
390 tcg_gen_xor_i64(d, n, m);
391 tcg_gen_rotri_i64(d, d, sh);
392}
393
394static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
395 TCGv_vec m, int64_t sh)
396{
397 tcg_gen_xor_vec(vece, d, n, m);
398 tcg_gen_rotri_vec(vece, d, d, sh);
399}
400
401void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
402 uint32_t rm_ofs, int64_t shift,
403 uint32_t opr_sz, uint32_t max_sz)
404{
405 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
406 static const GVecGen3i ops[4] = {
407 { .fni8 = gen_xar8_i64,
408 .fniv = gen_xar_vec,
409 .fno = gen_helper_sve2_xar_b,
410 .opt_opc = vecop,
411 .vece = MO_8 },
412 { .fni8 = gen_xar16_i64,
413 .fniv = gen_xar_vec,
414 .fno = gen_helper_sve2_xar_h,
415 .opt_opc = vecop,
416 .vece = MO_16 },
417 { .fni4 = gen_xar_i32,
418 .fniv = gen_xar_vec,
419 .fno = gen_helper_sve2_xar_s,
420 .opt_opc = vecop,
421 .vece = MO_32 },
422 { .fni8 = gen_xar_i64,
423 .fniv = gen_xar_vec,
424 .fno = gen_helper_gvec_xar_d,
425 .opt_opc = vecop,
426 .vece = MO_64 }
427 };
428 int esize = 8 << vece;
429
430 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
431 tcg_debug_assert(shift >= 0);
432 tcg_debug_assert(shift <= esize);
433 shift &= esize - 1;
434
435 if (shift == 0) {
436 /* xar with no rotate devolves to xor. */
437 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
438 } else {
439 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
440 shift, &ops[vece]);
441 }
442}
443
444static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
445{
446 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
447 return false;
448 }
449 if (sve_access_check(s)) {
450 unsigned vsz = vec_full_reg_size(s);
451 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
452 vec_full_reg_offset(s, a->rn),
453 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
454 }
455 return true;
456}
457
911cdc6d
RH
458static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
459{
460 if (!dc_isar_feature(aa64_sve2, s)) {
461 return false;
462 }
463 if (sve_access_check(s)) {
464 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
465 }
466 return true;
467}
468
469static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
470{
471 tcg_gen_xor_i64(d, n, m);
472 tcg_gen_xor_i64(d, d, k);
473}
474
475static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
476 TCGv_vec m, TCGv_vec k)
477{
478 tcg_gen_xor_vec(vece, d, n, m);
479 tcg_gen_xor_vec(vece, d, d, k);
480}
481
482static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
483 uint32_t a, uint32_t oprsz, uint32_t maxsz)
484{
485 static const GVecGen4 op = {
486 .fni8 = gen_eor3_i64,
487 .fniv = gen_eor3_vec,
488 .fno = gen_helper_sve2_eor3,
489 .vece = MO_64,
490 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
491 };
492 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
493}
494
495static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
496{
497 return do_sve2_zzzz_fn(s, a, gen_eor3);
498}
499
500static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
501{
502 tcg_gen_andc_i64(d, m, k);
503 tcg_gen_xor_i64(d, d, n);
504}
505
506static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
507 TCGv_vec m, TCGv_vec k)
508{
509 tcg_gen_andc_vec(vece, d, m, k);
510 tcg_gen_xor_vec(vece, d, d, n);
511}
512
513static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
514 uint32_t a, uint32_t oprsz, uint32_t maxsz)
515{
516 static const GVecGen4 op = {
517 .fni8 = gen_bcax_i64,
518 .fniv = gen_bcax_vec,
519 .fno = gen_helper_sve2_bcax,
520 .vece = MO_64,
521 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
522 };
523 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
524}
525
526static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
527{
528 return do_sve2_zzzz_fn(s, a, gen_bcax);
529}
530
531static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
532 uint32_t a, uint32_t oprsz, uint32_t maxsz)
533{
534 /* BSL differs from the generic bitsel in argument ordering. */
535 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
536}
537
538static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
539{
540 return do_sve2_zzzz_fn(s, a, gen_bsl);
541}
542
543static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
544{
545 tcg_gen_andc_i64(n, k, n);
546 tcg_gen_andc_i64(m, m, k);
547 tcg_gen_or_i64(d, n, m);
548}
549
550static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
551 TCGv_vec m, TCGv_vec k)
552{
553 if (TCG_TARGET_HAS_bitsel_vec) {
554 tcg_gen_not_vec(vece, n, n);
555 tcg_gen_bitsel_vec(vece, d, k, n, m);
556 } else {
557 tcg_gen_andc_vec(vece, n, k, n);
558 tcg_gen_andc_vec(vece, m, m, k);
559 tcg_gen_or_vec(vece, d, n, m);
560 }
561}
562
563static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
564 uint32_t a, uint32_t oprsz, uint32_t maxsz)
565{
566 static const GVecGen4 op = {
567 .fni8 = gen_bsl1n_i64,
568 .fniv = gen_bsl1n_vec,
569 .fno = gen_helper_sve2_bsl1n,
570 .vece = MO_64,
571 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
572 };
573 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
574}
575
576static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
577{
578 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
579}
580
581static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
582{
583 /*
584 * Z[dn] = (n & k) | (~m & ~k)
585 * = | ~(m | k)
586 */
587 tcg_gen_and_i64(n, n, k);
588 if (TCG_TARGET_HAS_orc_i64) {
589 tcg_gen_or_i64(m, m, k);
590 tcg_gen_orc_i64(d, n, m);
591 } else {
592 tcg_gen_nor_i64(m, m, k);
593 tcg_gen_or_i64(d, n, m);
594 }
595}
596
597static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
598 TCGv_vec m, TCGv_vec k)
599{
600 if (TCG_TARGET_HAS_bitsel_vec) {
601 tcg_gen_not_vec(vece, m, m);
602 tcg_gen_bitsel_vec(vece, d, k, n, m);
603 } else {
604 tcg_gen_and_vec(vece, n, n, k);
605 tcg_gen_or_vec(vece, m, m, k);
606 tcg_gen_orc_vec(vece, d, n, m);
607 }
608}
609
610static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
611 uint32_t a, uint32_t oprsz, uint32_t maxsz)
612{
613 static const GVecGen4 op = {
614 .fni8 = gen_bsl2n_i64,
615 .fniv = gen_bsl2n_vec,
616 .fno = gen_helper_sve2_bsl2n,
617 .vece = MO_64,
618 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
619 };
620 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
621}
622
623static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
624{
625 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
626}
627
628static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
629{
630 tcg_gen_and_i64(n, n, k);
631 tcg_gen_andc_i64(m, m, k);
632 tcg_gen_nor_i64(d, n, m);
633}
634
635static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
636 TCGv_vec m, TCGv_vec k)
637{
638 tcg_gen_bitsel_vec(vece, d, k, n, m);
639 tcg_gen_not_vec(vece, d, d);
640}
641
642static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
643 uint32_t a, uint32_t oprsz, uint32_t maxsz)
644{
645 static const GVecGen4 op = {
646 .fni8 = gen_nbsl_i64,
647 .fniv = gen_nbsl_vec,
648 .fno = gen_helper_sve2_nbsl,
649 .vece = MO_64,
650 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
651 };
652 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
653}
654
655static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
656{
657 return do_sve2_zzzz_fn(s, a, gen_nbsl);
658}
659
fea98f9c
RH
660/*
661 *** SVE Integer Arithmetic - Unpredicated Group
662 */
663
3a7be554 664static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 665{
28c4da31 666 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
667}
668
3a7be554 669static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 670{
28c4da31 671 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
672}
673
3a7be554 674static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 675{
28c4da31 676 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
677}
678
3a7be554 679static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 680{
28c4da31 681 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
682}
683
3a7be554 684static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 685{
28c4da31 686 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
687}
688
3a7be554 689static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 690{
28c4da31 691 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
692}
693
f97cfd59
RH
694/*
695 *** SVE Integer Arithmetic - Binary Predicated Group
696 */
697
698static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
699{
f97cfd59
RH
700 if (fn == NULL) {
701 return false;
702 }
703 if (sve_access_check(s)) {
36cbb7a8 704 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
705 }
706 return true;
707}
708
a2103582
RH
709/* Select active elememnts from Zn and inactive elements from Zm,
710 * storing the result in Zd.
711 */
712static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
713{
714 static gen_helper_gvec_4 * const fns[4] = {
715 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
716 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
717 };
36cbb7a8 718 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
719}
720
f97cfd59 721#define DO_ZPZZ(NAME, name) \
3a7be554 722static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
723{ \
724 static gen_helper_gvec_4 * const fns[4] = { \
725 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
726 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
727 }; \
728 return do_zpzz_ool(s, a, fns[a->esz]); \
729}
730
731DO_ZPZZ(AND, and)
732DO_ZPZZ(EOR, eor)
733DO_ZPZZ(ORR, orr)
734DO_ZPZZ(BIC, bic)
735
736DO_ZPZZ(ADD, add)
737DO_ZPZZ(SUB, sub)
738
739DO_ZPZZ(SMAX, smax)
740DO_ZPZZ(UMAX, umax)
741DO_ZPZZ(SMIN, smin)
742DO_ZPZZ(UMIN, umin)
743DO_ZPZZ(SABD, sabd)
744DO_ZPZZ(UABD, uabd)
745
746DO_ZPZZ(MUL, mul)
747DO_ZPZZ(SMULH, smulh)
748DO_ZPZZ(UMULH, umulh)
749
27721dbb
RH
750DO_ZPZZ(ASR, asr)
751DO_ZPZZ(LSR, lsr)
752DO_ZPZZ(LSL, lsl)
753
3a7be554 754static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
755{
756 static gen_helper_gvec_4 * const fns[4] = {
757 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
758 };
759 return do_zpzz_ool(s, a, fns[a->esz]);
760}
761
3a7be554 762static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
763{
764 static gen_helper_gvec_4 * const fns[4] = {
765 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
766 };
767 return do_zpzz_ool(s, a, fns[a->esz]);
768}
769
3a7be554 770static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
771{
772 if (sve_access_check(s)) {
773 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
774 }
775 return true;
776}
d3fe4a29 777
f97cfd59
RH
778#undef DO_ZPZZ
779
afac6d04
RH
780/*
781 *** SVE Integer Arithmetic - Unary Predicated Group
782 */
783
784static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
785{
786 if (fn == NULL) {
787 return false;
788 }
789 if (sve_access_check(s)) {
96a461f7 790 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
791 }
792 return true;
793}
794
795#define DO_ZPZ(NAME, name) \
3a7be554 796static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
797{ \
798 static gen_helper_gvec_3 * const fns[4] = { \
799 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
800 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
801 }; \
802 return do_zpz_ool(s, a, fns[a->esz]); \
803}
804
805DO_ZPZ(CLS, cls)
806DO_ZPZ(CLZ, clz)
807DO_ZPZ(CNT_zpz, cnt_zpz)
808DO_ZPZ(CNOT, cnot)
809DO_ZPZ(NOT_zpz, not_zpz)
810DO_ZPZ(ABS, abs)
811DO_ZPZ(NEG, neg)
812
3a7be554 813static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
814{
815 static gen_helper_gvec_3 * const fns[4] = {
816 NULL,
817 gen_helper_sve_fabs_h,
818 gen_helper_sve_fabs_s,
819 gen_helper_sve_fabs_d
820 };
821 return do_zpz_ool(s, a, fns[a->esz]);
822}
823
3a7be554 824static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
825{
826 static gen_helper_gvec_3 * const fns[4] = {
827 NULL,
828 gen_helper_sve_fneg_h,
829 gen_helper_sve_fneg_s,
830 gen_helper_sve_fneg_d
831 };
832 return do_zpz_ool(s, a, fns[a->esz]);
833}
834
3a7be554 835static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
836{
837 static gen_helper_gvec_3 * const fns[4] = {
838 NULL,
839 gen_helper_sve_sxtb_h,
840 gen_helper_sve_sxtb_s,
841 gen_helper_sve_sxtb_d
842 };
843 return do_zpz_ool(s, a, fns[a->esz]);
844}
845
3a7be554 846static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
847{
848 static gen_helper_gvec_3 * const fns[4] = {
849 NULL,
850 gen_helper_sve_uxtb_h,
851 gen_helper_sve_uxtb_s,
852 gen_helper_sve_uxtb_d
853 };
854 return do_zpz_ool(s, a, fns[a->esz]);
855}
856
3a7be554 857static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
858{
859 static gen_helper_gvec_3 * const fns[4] = {
860 NULL, NULL,
861 gen_helper_sve_sxth_s,
862 gen_helper_sve_sxth_d
863 };
864 return do_zpz_ool(s, a, fns[a->esz]);
865}
866
3a7be554 867static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
868{
869 static gen_helper_gvec_3 * const fns[4] = {
870 NULL, NULL,
871 gen_helper_sve_uxth_s,
872 gen_helper_sve_uxth_d
873 };
874 return do_zpz_ool(s, a, fns[a->esz]);
875}
876
3a7be554 877static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
878{
879 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
880}
881
3a7be554 882static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
883{
884 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
885}
886
887#undef DO_ZPZ
888
047cec97
RH
889/*
890 *** SVE Integer Reduction Group
891 */
892
893typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
894static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
895 gen_helper_gvec_reduc *fn)
896{
897 unsigned vsz = vec_full_reg_size(s);
898 TCGv_ptr t_zn, t_pg;
899 TCGv_i32 desc;
900 TCGv_i64 temp;
901
902 if (fn == NULL) {
903 return false;
904 }
905 if (!sve_access_check(s)) {
906 return true;
907 }
908
c6a59b55 909 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
910 temp = tcg_temp_new_i64();
911 t_zn = tcg_temp_new_ptr();
912 t_pg = tcg_temp_new_ptr();
913
914 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
915 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
916 fn(temp, t_zn, t_pg, desc);
917 tcg_temp_free_ptr(t_zn);
918 tcg_temp_free_ptr(t_pg);
047cec97
RH
919
920 write_fp_dreg(s, a->rd, temp);
921 tcg_temp_free_i64(temp);
922 return true;
923}
924
925#define DO_VPZ(NAME, name) \
3a7be554 926static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
927{ \
928 static gen_helper_gvec_reduc * const fns[4] = { \
929 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
930 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
931 }; \
932 return do_vpz_ool(s, a, fns[a->esz]); \
933}
934
935DO_VPZ(ORV, orv)
936DO_VPZ(ANDV, andv)
937DO_VPZ(EORV, eorv)
938
939DO_VPZ(UADDV, uaddv)
940DO_VPZ(SMAXV, smaxv)
941DO_VPZ(UMAXV, umaxv)
942DO_VPZ(SMINV, sminv)
943DO_VPZ(UMINV, uminv)
944
3a7be554 945static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
946{
947 static gen_helper_gvec_reduc * const fns[4] = {
948 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
949 gen_helper_sve_saddv_s, NULL
950 };
951 return do_vpz_ool(s, a, fns[a->esz]);
952}
953
954#undef DO_VPZ
955
ccd841c3
RH
956/*
957 *** SVE Shift by Immediate - Predicated Group
958 */
959
60245996
RH
960/*
961 * Copy Zn into Zd, storing zeros into inactive elements.
962 * If invert, store zeros into the active elements.
ccd841c3 963 */
60245996
RH
964static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
965 int esz, bool invert)
ccd841c3 966{
60245996
RH
967 static gen_helper_gvec_3 * const fns[4] = {
968 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
969 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 970 };
60245996 971
ccd841c3 972 if (sve_access_check(s)) {
96a461f7 973 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
974 }
975 return true;
976}
977
978static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
979 gen_helper_gvec_3 *fn)
980{
981 if (sve_access_check(s)) {
96a461f7 982 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
983 }
984 return true;
985}
986
3a7be554 987static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
988{
989 static gen_helper_gvec_3 * const fns[4] = {
990 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
991 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
992 };
993 if (a->esz < 0) {
994 /* Invalid tsz encoding -- see tszimm_esz. */
995 return false;
996 }
997 /* Shift by element size is architecturally valid. For
998 arithmetic right-shift, it's the same as by one less. */
999 a->imm = MIN(a->imm, (8 << a->esz) - 1);
1000 return do_zpzi_ool(s, a, fns[a->esz]);
1001}
1002
3a7be554 1003static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1004{
1005 static gen_helper_gvec_3 * const fns[4] = {
1006 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1007 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1008 };
1009 if (a->esz < 0) {
1010 return false;
1011 }
1012 /* Shift by element size is architecturally valid.
1013 For logical shifts, it is a zeroing operation. */
1014 if (a->imm >= (8 << a->esz)) {
60245996 1015 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1016 } else {
1017 return do_zpzi_ool(s, a, fns[a->esz]);
1018 }
1019}
1020
3a7be554 1021static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1022{
1023 static gen_helper_gvec_3 * const fns[4] = {
1024 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1025 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1026 };
1027 if (a->esz < 0) {
1028 return false;
1029 }
1030 /* Shift by element size is architecturally valid.
1031 For logical shifts, it is a zeroing operation. */
1032 if (a->imm >= (8 << a->esz)) {
60245996 1033 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1034 } else {
1035 return do_zpzi_ool(s, a, fns[a->esz]);
1036 }
1037}
1038
3a7be554 1039static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1040{
1041 static gen_helper_gvec_3 * const fns[4] = {
1042 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1043 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1044 };
1045 if (a->esz < 0) {
1046 return false;
1047 }
1048 /* Shift by element size is architecturally valid. For arithmetic
1049 right shift for division, it is a zeroing operation. */
1050 if (a->imm >= (8 << a->esz)) {
60245996 1051 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1052 } else {
1053 return do_zpzi_ool(s, a, fns[a->esz]);
1054 }
1055}
1056
a5421b54
SL
1057static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1058{
1059 static gen_helper_gvec_3 * const fns[4] = {
1060 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1061 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1062 };
1063 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1064 return false;
1065 }
1066 return do_zpzi_ool(s, a, fns[a->esz]);
1067}
1068
1069static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1070{
1071 static gen_helper_gvec_3 * const fns[4] = {
1072 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1073 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1074 };
1075 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1076 return false;
1077 }
1078 return do_zpzi_ool(s, a, fns[a->esz]);
1079}
1080
1081static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1082{
1083 static gen_helper_gvec_3 * const fns[4] = {
1084 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1085 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1086 };
1087 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1088 return false;
1089 }
1090 return do_zpzi_ool(s, a, fns[a->esz]);
1091}
1092
1093static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1094{
1095 static gen_helper_gvec_3 * const fns[4] = {
1096 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1097 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1098 };
1099 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1100 return false;
1101 }
1102 return do_zpzi_ool(s, a, fns[a->esz]);
1103}
1104
1105static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1106{
1107 static gen_helper_gvec_3 * const fns[4] = {
1108 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1109 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1110 };
1111 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1112 return false;
1113 }
1114 return do_zpzi_ool(s, a, fns[a->esz]);
1115}
1116
fe7f8dfb
RH
1117/*
1118 *** SVE Bitwise Shift - Predicated Group
1119 */
1120
1121#define DO_ZPZW(NAME, name) \
3a7be554 1122static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1123{ \
1124 static gen_helper_gvec_4 * const fns[3] = { \
1125 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1126 gen_helper_sve_##name##_zpzw_s, \
1127 }; \
1128 if (a->esz < 0 || a->esz >= 3) { \
1129 return false; \
1130 } \
1131 return do_zpzz_ool(s, a, fns[a->esz]); \
1132}
1133
1134DO_ZPZW(ASR, asr)
1135DO_ZPZW(LSR, lsr)
1136DO_ZPZW(LSL, lsl)
1137
1138#undef DO_ZPZW
1139
d9d78dcc
RH
1140/*
1141 *** SVE Bitwise Shift - Unpredicated Group
1142 */
1143
1144static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1145 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1146 int64_t, uint32_t, uint32_t))
1147{
1148 if (a->esz < 0) {
1149 /* Invalid tsz encoding -- see tszimm_esz. */
1150 return false;
1151 }
1152 if (sve_access_check(s)) {
1153 unsigned vsz = vec_full_reg_size(s);
1154 /* Shift by element size is architecturally valid. For
1155 arithmetic right-shift, it's the same as by one less.
1156 Otherwise it is a zeroing operation. */
1157 if (a->imm >= 8 << a->esz) {
1158 if (asr) {
1159 a->imm = (8 << a->esz) - 1;
1160 } else {
1161 do_dupi_z(s, a->rd, 0);
1162 return true;
1163 }
1164 }
1165 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1166 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1167 }
1168 return true;
1169}
1170
3a7be554 1171static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1172{
1173 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1174}
1175
3a7be554 1176static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1177{
1178 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1179}
1180
3a7be554 1181static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1182{
1183 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1184}
1185
d9d78dcc 1186#define DO_ZZW(NAME, name) \
3a7be554 1187static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
1188{ \
1189 static gen_helper_gvec_3 * const fns[4] = { \
1190 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1191 gen_helper_sve_##name##_zzw_s, NULL \
1192 }; \
84a272f5 1193 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, 0); \
d9d78dcc
RH
1194}
1195
1196DO_ZZW(ASR, asr)
1197DO_ZZW(LSR, lsr)
1198DO_ZZW(LSL, lsl)
1199
1200#undef DO_ZZW
1201
96a36e4a
RH
1202/*
1203 *** SVE Integer Multiply-Add Group
1204 */
1205
1206static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1207 gen_helper_gvec_5 *fn)
1208{
1209 if (sve_access_check(s)) {
1210 unsigned vsz = vec_full_reg_size(s);
1211 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1212 vec_full_reg_offset(s, a->ra),
1213 vec_full_reg_offset(s, a->rn),
1214 vec_full_reg_offset(s, a->rm),
1215 pred_full_reg_offset(s, a->pg),
1216 vsz, vsz, 0, fn);
1217 }
1218 return true;
1219}
1220
1221#define DO_ZPZZZ(NAME, name) \
3a7be554 1222static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1223{ \
1224 static gen_helper_gvec_5 * const fns[4] = { \
1225 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1226 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1227 }; \
1228 return do_zpzzz_ool(s, a, fns[a->esz]); \
1229}
1230
1231DO_ZPZZZ(MLA, mla)
1232DO_ZPZZZ(MLS, mls)
1233
1234#undef DO_ZPZZZ
1235
9a56c9c3
RH
1236/*
1237 *** SVE Index Generation Group
1238 */
1239
1240static void do_index(DisasContext *s, int esz, int rd,
1241 TCGv_i64 start, TCGv_i64 incr)
1242{
1243 unsigned vsz = vec_full_reg_size(s);
c6a59b55 1244 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
9a56c9c3
RH
1245 TCGv_ptr t_zd = tcg_temp_new_ptr();
1246
1247 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1248 if (esz == 3) {
1249 gen_helper_sve_index_d(t_zd, start, incr, desc);
1250 } else {
1251 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1252 static index_fn * const fns[3] = {
1253 gen_helper_sve_index_b,
1254 gen_helper_sve_index_h,
1255 gen_helper_sve_index_s,
1256 };
1257 TCGv_i32 s32 = tcg_temp_new_i32();
1258 TCGv_i32 i32 = tcg_temp_new_i32();
1259
1260 tcg_gen_extrl_i64_i32(s32, start);
1261 tcg_gen_extrl_i64_i32(i32, incr);
1262 fns[esz](t_zd, s32, i32, desc);
1263
1264 tcg_temp_free_i32(s32);
1265 tcg_temp_free_i32(i32);
1266 }
1267 tcg_temp_free_ptr(t_zd);
9a56c9c3
RH
1268}
1269
3a7be554 1270static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1271{
1272 if (sve_access_check(s)) {
b0c3aece
RH
1273 TCGv_i64 start = tcg_constant_i64(a->imm1);
1274 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1275 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1276 }
1277 return true;
1278}
1279
3a7be554 1280static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1281{
1282 if (sve_access_check(s)) {
b0c3aece 1283 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1284 TCGv_i64 incr = cpu_reg(s, a->rm);
1285 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1286 }
1287 return true;
1288}
1289
3a7be554 1290static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1291{
1292 if (sve_access_check(s)) {
1293 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1294 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1295 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1296 }
1297 return true;
1298}
1299
3a7be554 1300static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1301{
1302 if (sve_access_check(s)) {
1303 TCGv_i64 start = cpu_reg(s, a->rn);
1304 TCGv_i64 incr = cpu_reg(s, a->rm);
1305 do_index(s, a->esz, a->rd, start, incr);
1306 }
1307 return true;
1308}
1309
96f922cc
RH
1310/*
1311 *** SVE Stack Allocation Group
1312 */
1313
3a7be554 1314static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1315{
5de56742
AC
1316 if (sve_access_check(s)) {
1317 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1318 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1319 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1320 }
96f922cc
RH
1321 return true;
1322}
1323
3a7be554 1324static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1325{
5de56742
AC
1326 if (sve_access_check(s)) {
1327 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1328 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1329 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1330 }
96f922cc
RH
1331 return true;
1332}
1333
3a7be554 1334static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1335{
5de56742
AC
1336 if (sve_access_check(s)) {
1337 TCGv_i64 reg = cpu_reg(s, a->rd);
1338 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1339 }
96f922cc
RH
1340 return true;
1341}
1342
4b242d9c
RH
1343/*
1344 *** SVE Compute Vector Address Group
1345 */
1346
1347static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1348{
913a8a00 1349 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1350}
1351
3a7be554 1352static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1353{
1354 return do_adr(s, a, gen_helper_sve_adr_p32);
1355}
1356
3a7be554 1357static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1358{
1359 return do_adr(s, a, gen_helper_sve_adr_p64);
1360}
1361
3a7be554 1362static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1363{
1364 return do_adr(s, a, gen_helper_sve_adr_s32);
1365}
1366
3a7be554 1367static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1368{
1369 return do_adr(s, a, gen_helper_sve_adr_u32);
1370}
1371
0762cd42
RH
1372/*
1373 *** SVE Integer Misc - Unpredicated Group
1374 */
1375
0ea3cdbf
RH
1376static gen_helper_gvec_2 * const fexpa_fns[4] = {
1377 NULL, gen_helper_sve_fexpa_h,
1378 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1379};
1380TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1381 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1382
3a7be554 1383static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1384{
1385 static gen_helper_gvec_3 * const fns[4] = {
1386 NULL,
1387 gen_helper_sve_ftssel_h,
1388 gen_helper_sve_ftssel_s,
1389 gen_helper_sve_ftssel_d,
1390 };
84a272f5 1391 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, 0);
a1f233f2
RH
1392}
1393
516e246a
RH
1394/*
1395 *** SVE Predicate Logical Operations Group
1396 */
1397
1398static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1399 const GVecGen4 *gvec_op)
1400{
1401 if (!sve_access_check(s)) {
1402 return true;
1403 }
1404
1405 unsigned psz = pred_gvec_reg_size(s);
1406 int dofs = pred_full_reg_offset(s, a->rd);
1407 int nofs = pred_full_reg_offset(s, a->rn);
1408 int mofs = pred_full_reg_offset(s, a->rm);
1409 int gofs = pred_full_reg_offset(s, a->pg);
1410
dd81a8d7
RH
1411 if (!a->s) {
1412 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1413 return true;
1414 }
1415
516e246a
RH
1416 if (psz == 8) {
1417 /* Do the operation and the flags generation in temps. */
1418 TCGv_i64 pd = tcg_temp_new_i64();
1419 TCGv_i64 pn = tcg_temp_new_i64();
1420 TCGv_i64 pm = tcg_temp_new_i64();
1421 TCGv_i64 pg = tcg_temp_new_i64();
1422
1423 tcg_gen_ld_i64(pn, cpu_env, nofs);
1424 tcg_gen_ld_i64(pm, cpu_env, mofs);
1425 tcg_gen_ld_i64(pg, cpu_env, gofs);
1426
1427 gvec_op->fni8(pd, pn, pm, pg);
1428 tcg_gen_st_i64(pd, cpu_env, dofs);
1429
1430 do_predtest1(pd, pg);
1431
1432 tcg_temp_free_i64(pd);
1433 tcg_temp_free_i64(pn);
1434 tcg_temp_free_i64(pm);
1435 tcg_temp_free_i64(pg);
1436 } else {
1437 /* The operation and flags generation is large. The computation
1438 * of the flags depends on the original contents of the guarding
1439 * predicate. If the destination overwrites the guarding predicate,
1440 * then the easiest way to get this right is to save a copy.
1441 */
1442 int tofs = gofs;
1443 if (a->rd == a->pg) {
1444 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1445 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1446 }
1447
1448 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1449 do_predtest(s, dofs, tofs, psz / 8);
1450 }
1451 return true;
1452}
1453
1454static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1455{
1456 tcg_gen_and_i64(pd, pn, pm);
1457 tcg_gen_and_i64(pd, pd, pg);
1458}
1459
1460static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1461 TCGv_vec pm, TCGv_vec pg)
1462{
1463 tcg_gen_and_vec(vece, pd, pn, pm);
1464 tcg_gen_and_vec(vece, pd, pd, pg);
1465}
1466
3a7be554 1467static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1468{
1469 static const GVecGen4 op = {
1470 .fni8 = gen_and_pg_i64,
1471 .fniv = gen_and_pg_vec,
1472 .fno = gen_helper_sve_and_pppp,
1473 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1474 };
dd81a8d7
RH
1475
1476 if (!a->s) {
1477 if (!sve_access_check(s)) {
1478 return true;
1479 }
1480 if (a->rn == a->rm) {
1481 if (a->pg == a->rn) {
1482 do_mov_p(s, a->rd, a->rn);
1483 } else {
1484 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1485 }
1486 return true;
1487 } else if (a->pg == a->rn || a->pg == a->rm) {
1488 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1489 return true;
516e246a 1490 }
516e246a 1491 }
dd81a8d7 1492 return do_pppp_flags(s, a, &op);
516e246a
RH
1493}
1494
1495static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1496{
1497 tcg_gen_andc_i64(pd, pn, pm);
1498 tcg_gen_and_i64(pd, pd, pg);
1499}
1500
1501static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1502 TCGv_vec pm, TCGv_vec pg)
1503{
1504 tcg_gen_andc_vec(vece, pd, pn, pm);
1505 tcg_gen_and_vec(vece, pd, pd, pg);
1506}
1507
3a7be554 1508static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1509{
1510 static const GVecGen4 op = {
1511 .fni8 = gen_bic_pg_i64,
1512 .fniv = gen_bic_pg_vec,
1513 .fno = gen_helper_sve_bic_pppp,
1514 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1515 };
dd81a8d7
RH
1516
1517 if (!a->s && a->pg == a->rn) {
1518 if (sve_access_check(s)) {
1519 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1520 }
1521 return true;
516e246a 1522 }
dd81a8d7 1523 return do_pppp_flags(s, a, &op);
516e246a
RH
1524}
1525
1526static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1527{
1528 tcg_gen_xor_i64(pd, pn, pm);
1529 tcg_gen_and_i64(pd, pd, pg);
1530}
1531
1532static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1533 TCGv_vec pm, TCGv_vec pg)
1534{
1535 tcg_gen_xor_vec(vece, pd, pn, pm);
1536 tcg_gen_and_vec(vece, pd, pd, pg);
1537}
1538
3a7be554 1539static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1540{
1541 static const GVecGen4 op = {
1542 .fni8 = gen_eor_pg_i64,
1543 .fniv = gen_eor_pg_vec,
1544 .fno = gen_helper_sve_eor_pppp,
1545 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1546 };
dd81a8d7 1547 return do_pppp_flags(s, a, &op);
516e246a
RH
1548}
1549
3a7be554 1550static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1551{
516e246a
RH
1552 if (a->s) {
1553 return false;
516e246a 1554 }
d4bc6232
RH
1555 if (sve_access_check(s)) {
1556 unsigned psz = pred_gvec_reg_size(s);
1557 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1558 pred_full_reg_offset(s, a->pg),
1559 pred_full_reg_offset(s, a->rn),
1560 pred_full_reg_offset(s, a->rm), psz, psz);
1561 }
1562 return true;
516e246a
RH
1563}
1564
1565static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1566{
1567 tcg_gen_or_i64(pd, pn, pm);
1568 tcg_gen_and_i64(pd, pd, pg);
1569}
1570
1571static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1572 TCGv_vec pm, TCGv_vec pg)
1573{
1574 tcg_gen_or_vec(vece, pd, pn, pm);
1575 tcg_gen_and_vec(vece, pd, pd, pg);
1576}
1577
3a7be554 1578static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1579{
1580 static const GVecGen4 op = {
1581 .fni8 = gen_orr_pg_i64,
1582 .fniv = gen_orr_pg_vec,
1583 .fno = gen_helper_sve_orr_pppp,
1584 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1585 };
dd81a8d7
RH
1586
1587 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1588 return do_mov_p(s, a->rd, a->rn);
516e246a 1589 }
dd81a8d7 1590 return do_pppp_flags(s, a, &op);
516e246a
RH
1591}
1592
1593static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1594{
1595 tcg_gen_orc_i64(pd, pn, pm);
1596 tcg_gen_and_i64(pd, pd, pg);
1597}
1598
1599static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1600 TCGv_vec pm, TCGv_vec pg)
1601{
1602 tcg_gen_orc_vec(vece, pd, pn, pm);
1603 tcg_gen_and_vec(vece, pd, pd, pg);
1604}
1605
3a7be554 1606static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1607{
1608 static const GVecGen4 op = {
1609 .fni8 = gen_orn_pg_i64,
1610 .fniv = gen_orn_pg_vec,
1611 .fno = gen_helper_sve_orn_pppp,
1612 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1613 };
dd81a8d7 1614 return do_pppp_flags(s, a, &op);
516e246a
RH
1615}
1616
1617static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1618{
1619 tcg_gen_or_i64(pd, pn, pm);
1620 tcg_gen_andc_i64(pd, pg, pd);
1621}
1622
1623static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1624 TCGv_vec pm, TCGv_vec pg)
1625{
1626 tcg_gen_or_vec(vece, pd, pn, pm);
1627 tcg_gen_andc_vec(vece, pd, pg, pd);
1628}
1629
3a7be554 1630static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1631{
1632 static const GVecGen4 op = {
1633 .fni8 = gen_nor_pg_i64,
1634 .fniv = gen_nor_pg_vec,
1635 .fno = gen_helper_sve_nor_pppp,
1636 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1637 };
dd81a8d7 1638 return do_pppp_flags(s, a, &op);
516e246a
RH
1639}
1640
1641static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1642{
1643 tcg_gen_and_i64(pd, pn, pm);
1644 tcg_gen_andc_i64(pd, pg, pd);
1645}
1646
1647static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1648 TCGv_vec pm, TCGv_vec pg)
1649{
1650 tcg_gen_and_vec(vece, pd, pn, pm);
1651 tcg_gen_andc_vec(vece, pd, pg, pd);
1652}
1653
3a7be554 1654static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1655{
1656 static const GVecGen4 op = {
1657 .fni8 = gen_nand_pg_i64,
1658 .fniv = gen_nand_pg_vec,
1659 .fno = gen_helper_sve_nand_pppp,
1660 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1661 };
dd81a8d7 1662 return do_pppp_flags(s, a, &op);
516e246a
RH
1663}
1664
9e18d7a6
RH
1665/*
1666 *** SVE Predicate Misc Group
1667 */
1668
3a7be554 1669static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1670{
1671 if (sve_access_check(s)) {
1672 int nofs = pred_full_reg_offset(s, a->rn);
1673 int gofs = pred_full_reg_offset(s, a->pg);
1674 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1675
1676 if (words == 1) {
1677 TCGv_i64 pn = tcg_temp_new_i64();
1678 TCGv_i64 pg = tcg_temp_new_i64();
1679
1680 tcg_gen_ld_i64(pn, cpu_env, nofs);
1681 tcg_gen_ld_i64(pg, cpu_env, gofs);
1682 do_predtest1(pn, pg);
1683
1684 tcg_temp_free_i64(pn);
1685 tcg_temp_free_i64(pg);
1686 } else {
1687 do_predtest(s, nofs, gofs, words);
1688 }
1689 }
1690 return true;
1691}
1692
028e2a7b
RH
1693/* See the ARM pseudocode DecodePredCount. */
1694static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1695{
1696 unsigned elements = fullsz >> esz;
1697 unsigned bound;
1698
1699 switch (pattern) {
1700 case 0x0: /* POW2 */
1701 return pow2floor(elements);
1702 case 0x1: /* VL1 */
1703 case 0x2: /* VL2 */
1704 case 0x3: /* VL3 */
1705 case 0x4: /* VL4 */
1706 case 0x5: /* VL5 */
1707 case 0x6: /* VL6 */
1708 case 0x7: /* VL7 */
1709 case 0x8: /* VL8 */
1710 bound = pattern;
1711 break;
1712 case 0x9: /* VL16 */
1713 case 0xa: /* VL32 */
1714 case 0xb: /* VL64 */
1715 case 0xc: /* VL128 */
1716 case 0xd: /* VL256 */
1717 bound = 16 << (pattern - 9);
1718 break;
1719 case 0x1d: /* MUL4 */
1720 return elements - elements % 4;
1721 case 0x1e: /* MUL3 */
1722 return elements - elements % 3;
1723 case 0x1f: /* ALL */
1724 return elements;
1725 default: /* #uimm5 */
1726 return 0;
1727 }
1728 return elements >= bound ? bound : 0;
1729}
1730
1731/* This handles all of the predicate initialization instructions,
1732 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1733 * so that decode_pred_count returns 0. For SETFFR, we will have
1734 * set RD == 16 == FFR.
1735 */
1736static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1737{
1738 if (!sve_access_check(s)) {
1739 return true;
1740 }
1741
1742 unsigned fullsz = vec_full_reg_size(s);
1743 unsigned ofs = pred_full_reg_offset(s, rd);
1744 unsigned numelem, setsz, i;
1745 uint64_t word, lastword;
1746 TCGv_i64 t;
1747
1748 numelem = decode_pred_count(fullsz, pat, esz);
1749
1750 /* Determine what we must store into each bit, and how many. */
1751 if (numelem == 0) {
1752 lastword = word = 0;
1753 setsz = fullsz;
1754 } else {
1755 setsz = numelem << esz;
1756 lastword = word = pred_esz_masks[esz];
1757 if (setsz % 64) {
973558a3 1758 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1759 }
1760 }
1761
1762 t = tcg_temp_new_i64();
1763 if (fullsz <= 64) {
1764 tcg_gen_movi_i64(t, lastword);
1765 tcg_gen_st_i64(t, cpu_env, ofs);
1766 goto done;
1767 }
1768
1769 if (word == lastword) {
1770 unsigned maxsz = size_for_gvec(fullsz / 8);
1771 unsigned oprsz = size_for_gvec(setsz / 8);
1772
1773 if (oprsz * 8 == setsz) {
8711e71f 1774 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1775 goto done;
1776 }
028e2a7b
RH
1777 }
1778
1779 setsz /= 8;
1780 fullsz /= 8;
1781
1782 tcg_gen_movi_i64(t, word);
973558a3 1783 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1784 tcg_gen_st_i64(t, cpu_env, ofs + i);
1785 }
1786 if (lastword != word) {
1787 tcg_gen_movi_i64(t, lastword);
1788 tcg_gen_st_i64(t, cpu_env, ofs + i);
1789 i += 8;
1790 }
1791 if (i < fullsz) {
1792 tcg_gen_movi_i64(t, 0);
1793 for (; i < fullsz; i += 8) {
1794 tcg_gen_st_i64(t, cpu_env, ofs + i);
1795 }
1796 }
1797
1798 done:
1799 tcg_temp_free_i64(t);
1800
1801 /* PTRUES */
1802 if (setflag) {
1803 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1804 tcg_gen_movi_i32(cpu_CF, word == 0);
1805 tcg_gen_movi_i32(cpu_VF, 0);
1806 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1807 }
1808 return true;
1809}
1810
3a7be554 1811static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1812{
1813 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1814}
1815
3a7be554 1816static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1817{
1818 /* Note pat == 31 is #all, to set all elements. */
1819 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1820}
1821
3a7be554 1822static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1823{
1824 /* Note pat == 32 is #unimp, to set no elements. */
1825 return do_predset(s, 0, a->rd, 32, false);
1826}
1827
3a7be554 1828static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1829{
1830 /* The path through do_pppp_flags is complicated enough to want to avoid
1831 * duplication. Frob the arguments into the form of a predicated AND.
1832 */
1833 arg_rprr_s alt_a = {
1834 .rd = a->rd, .pg = a->pg, .s = a->s,
1835 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1836 };
3a7be554 1837 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1838}
1839
3a7be554 1840static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1841{
1842 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1843}
1844
3a7be554 1845static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1846{
1847 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1848}
1849
1850static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1851 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1852 TCGv_ptr, TCGv_i32))
1853{
1854 if (!sve_access_check(s)) {
1855 return true;
1856 }
1857
1858 TCGv_ptr t_pd = tcg_temp_new_ptr();
1859 TCGv_ptr t_pg = tcg_temp_new_ptr();
1860 TCGv_i32 t;
86300b5d 1861 unsigned desc = 0;
028e2a7b 1862
86300b5d
RH
1863 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1864 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1865
1866 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1867 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1868 t = tcg_temp_new_i32();
028e2a7b 1869
392acacc 1870 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1871 tcg_temp_free_ptr(t_pd);
1872 tcg_temp_free_ptr(t_pg);
1873
1874 do_pred_flags(t);
1875 tcg_temp_free_i32(t);
1876 return true;
1877}
1878
3a7be554 1879static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1880{
1881 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1882}
1883
3a7be554 1884static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1885{
1886 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1887}
1888
24e82e68
RH
1889/*
1890 *** SVE Element Count Group
1891 */
1892
1893/* Perform an inline saturating addition of a 32-bit value within
1894 * a 64-bit register. The second operand is known to be positive,
1895 * which halves the comparisions we must perform to bound the result.
1896 */
1897static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1898{
1899 int64_t ibound;
24e82e68
RH
1900
1901 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1902 if (u) {
1903 tcg_gen_ext32u_i64(reg, reg);
1904 } else {
1905 tcg_gen_ext32s_i64(reg, reg);
1906 }
1907 if (d) {
1908 tcg_gen_sub_i64(reg, reg, val);
1909 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1910 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1911 } else {
1912 tcg_gen_add_i64(reg, reg, val);
1913 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1914 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1915 }
24e82e68
RH
1916}
1917
1918/* Similarly with 64-bit values. */
1919static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1920{
1921 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1922 TCGv_i64 t2;
1923
1924 if (u) {
1925 if (d) {
1926 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1927 t2 = tcg_constant_i64(0);
1928 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1929 } else {
1930 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1931 t2 = tcg_constant_i64(-1);
1932 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1933 }
1934 } else {
35a1ec8e 1935 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1936 if (d) {
1937 /* Detect signed overflow for subtraction. */
1938 tcg_gen_xor_i64(t0, reg, val);
1939 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1940 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1941 tcg_gen_and_i64(t0, t0, reg);
1942
1943 /* Bound the result. */
1944 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1945 t2 = tcg_constant_i64(0);
24e82e68
RH
1946 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1947 } else {
1948 /* Detect signed overflow for addition. */
1949 tcg_gen_xor_i64(t0, reg, val);
1950 tcg_gen_add_i64(reg, reg, val);
1951 tcg_gen_xor_i64(t1, reg, val);
1952 tcg_gen_andc_i64(t0, t1, t0);
1953
1954 /* Bound the result. */
1955 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1956 t2 = tcg_constant_i64(0);
24e82e68
RH
1957 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1958 }
35a1ec8e 1959 tcg_temp_free_i64(t1);
24e82e68
RH
1960 }
1961 tcg_temp_free_i64(t0);
24e82e68
RH
1962}
1963
1964/* Similarly with a vector and a scalar operand. */
1965static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1966 TCGv_i64 val, bool u, bool d)
1967{
1968 unsigned vsz = vec_full_reg_size(s);
1969 TCGv_ptr dptr, nptr;
1970 TCGv_i32 t32, desc;
1971 TCGv_i64 t64;
1972
1973 dptr = tcg_temp_new_ptr();
1974 nptr = tcg_temp_new_ptr();
1975 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1976 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1977 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1978
1979 switch (esz) {
1980 case MO_8:
1981 t32 = tcg_temp_new_i32();
1982 tcg_gen_extrl_i64_i32(t32, val);
1983 if (d) {
1984 tcg_gen_neg_i32(t32, t32);
1985 }
1986 if (u) {
1987 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1988 } else {
1989 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1990 }
1991 tcg_temp_free_i32(t32);
1992 break;
1993
1994 case MO_16:
1995 t32 = tcg_temp_new_i32();
1996 tcg_gen_extrl_i64_i32(t32, val);
1997 if (d) {
1998 tcg_gen_neg_i32(t32, t32);
1999 }
2000 if (u) {
2001 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2002 } else {
2003 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2004 }
2005 tcg_temp_free_i32(t32);
2006 break;
2007
2008 case MO_32:
2009 t64 = tcg_temp_new_i64();
2010 if (d) {
2011 tcg_gen_neg_i64(t64, val);
2012 } else {
2013 tcg_gen_mov_i64(t64, val);
2014 }
2015 if (u) {
2016 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2017 } else {
2018 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2019 }
2020 tcg_temp_free_i64(t64);
2021 break;
2022
2023 case MO_64:
2024 if (u) {
2025 if (d) {
2026 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2027 } else {
2028 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2029 }
2030 } else if (d) {
2031 t64 = tcg_temp_new_i64();
2032 tcg_gen_neg_i64(t64, val);
2033 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2034 tcg_temp_free_i64(t64);
2035 } else {
2036 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2037 }
2038 break;
2039
2040 default:
2041 g_assert_not_reached();
2042 }
2043
2044 tcg_temp_free_ptr(dptr);
2045 tcg_temp_free_ptr(nptr);
24e82e68
RH
2046}
2047
3a7be554 2048static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2049{
2050 if (sve_access_check(s)) {
2051 unsigned fullsz = vec_full_reg_size(s);
2052 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2053 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2054 }
2055 return true;
2056}
2057
3a7be554 2058static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2059{
2060 if (sve_access_check(s)) {
2061 unsigned fullsz = vec_full_reg_size(s);
2062 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2063 int inc = numelem * a->imm * (a->d ? -1 : 1);
2064 TCGv_i64 reg = cpu_reg(s, a->rd);
2065
2066 tcg_gen_addi_i64(reg, reg, inc);
2067 }
2068 return true;
2069}
2070
3a7be554 2071static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2072{
2073 if (!sve_access_check(s)) {
2074 return true;
2075 }
2076
2077 unsigned fullsz = vec_full_reg_size(s);
2078 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2079 int inc = numelem * a->imm;
2080 TCGv_i64 reg = cpu_reg(s, a->rd);
2081
2082 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2083 if (inc == 0) {
2084 if (a->u) {
2085 tcg_gen_ext32u_i64(reg, reg);
2086 } else {
2087 tcg_gen_ext32s_i64(reg, reg);
2088 }
2089 } else {
d681f125 2090 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2091 }
2092 return true;
2093}
2094
3a7be554 2095static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2096{
2097 if (!sve_access_check(s)) {
2098 return true;
2099 }
2100
2101 unsigned fullsz = vec_full_reg_size(s);
2102 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2103 int inc = numelem * a->imm;
2104 TCGv_i64 reg = cpu_reg(s, a->rd);
2105
2106 if (inc != 0) {
d681f125 2107 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2108 }
2109 return true;
2110}
2111
3a7be554 2112static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2113{
2114 if (a->esz == 0) {
2115 return false;
2116 }
2117
2118 unsigned fullsz = vec_full_reg_size(s);
2119 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2120 int inc = numelem * a->imm;
2121
2122 if (inc != 0) {
2123 if (sve_access_check(s)) {
24e82e68
RH
2124 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2125 vec_full_reg_offset(s, a->rn),
d681f125
RH
2126 tcg_constant_i64(a->d ? -inc : inc),
2127 fullsz, fullsz);
24e82e68
RH
2128 }
2129 } else {
2130 do_mov_z(s, a->rd, a->rn);
2131 }
2132 return true;
2133}
2134
3a7be554 2135static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2136{
2137 if (a->esz == 0) {
2138 return false;
2139 }
2140
2141 unsigned fullsz = vec_full_reg_size(s);
2142 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2143 int inc = numelem * a->imm;
2144
2145 if (inc != 0) {
2146 if (sve_access_check(s)) {
d681f125
RH
2147 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2148 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2149 }
2150 } else {
2151 do_mov_z(s, a->rd, a->rn);
2152 }
2153 return true;
2154}
2155
e1fa1164
RH
2156/*
2157 *** SVE Bitwise Immediate Group
2158 */
2159
2160static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2161{
2162 uint64_t imm;
2163 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2164 extract32(a->dbm, 0, 6),
2165 extract32(a->dbm, 6, 6))) {
2166 return false;
2167 }
2168 if (sve_access_check(s)) {
2169 unsigned vsz = vec_full_reg_size(s);
2170 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2171 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2172 }
2173 return true;
2174}
2175
3a7be554 2176static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2177{
2178 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2179}
2180
3a7be554 2181static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2182{
2183 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2184}
2185
3a7be554 2186static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2187{
2188 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2189}
2190
3a7be554 2191static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2192{
2193 uint64_t imm;
2194 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2195 extract32(a->dbm, 0, 6),
2196 extract32(a->dbm, 6, 6))) {
2197 return false;
2198 }
2199 if (sve_access_check(s)) {
2200 do_dupi_z(s, a->rd, imm);
2201 }
2202 return true;
2203}
2204
f25a2361
RH
2205/*
2206 *** SVE Integer Wide Immediate - Predicated Group
2207 */
2208
2209/* Implement all merging copies. This is used for CPY (immediate),
2210 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2211 */
2212static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2213 TCGv_i64 val)
2214{
2215 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2216 static gen_cpy * const fns[4] = {
2217 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2218 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2219 };
2220 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2221 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2222 TCGv_ptr t_zd = tcg_temp_new_ptr();
2223 TCGv_ptr t_zn = tcg_temp_new_ptr();
2224 TCGv_ptr t_pg = tcg_temp_new_ptr();
2225
2226 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2227 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2228 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2229
2230 fns[esz](t_zd, t_zn, t_pg, val, desc);
2231
2232 tcg_temp_free_ptr(t_zd);
2233 tcg_temp_free_ptr(t_zn);
2234 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2235}
2236
3a7be554 2237static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2238{
2239 if (a->esz == 0) {
2240 return false;
2241 }
2242 if (sve_access_check(s)) {
2243 /* Decode the VFP immediate. */
2244 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2245 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2246 }
2247 return true;
2248}
2249
3a7be554 2250static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2251{
3a7be554 2252 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2253 return false;
2254 }
2255 if (sve_access_check(s)) {
e152b48b 2256 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2257 }
2258 return true;
2259}
2260
3a7be554 2261static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2262{
2263 static gen_helper_gvec_2i * const fns[4] = {
2264 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2265 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2266 };
2267
3a7be554 2268 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2269 return false;
2270 }
2271 if (sve_access_check(s)) {
2272 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2273 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2274 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2275 tcg_constant_i64(a->imm),
2276 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2277 }
2278 return true;
2279}
2280
b94f8f60
RH
2281/*
2282 *** SVE Permute Extract Group
2283 */
2284
75114792 2285static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2286{
2287 if (!sve_access_check(s)) {
2288 return true;
2289 }
2290
2291 unsigned vsz = vec_full_reg_size(s);
75114792 2292 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2293 unsigned n_siz = vsz - n_ofs;
75114792
SL
2294 unsigned d = vec_full_reg_offset(s, rd);
2295 unsigned n = vec_full_reg_offset(s, rn);
2296 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2297
2298 /* Use host vector move insns if we have appropriate sizes
2299 * and no unfortunate overlap.
2300 */
2301 if (m != d
2302 && n_ofs == size_for_gvec(n_ofs)
2303 && n_siz == size_for_gvec(n_siz)
2304 && (d != n || n_siz <= n_ofs)) {
2305 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2306 if (n_ofs != 0) {
2307 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2308 }
2309 } else {
2310 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2311 }
2312 return true;
2313}
2314
75114792
SL
2315static bool trans_EXT(DisasContext *s, arg_EXT *a)
2316{
2317 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2318}
2319
2320static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2321{
2322 if (!dc_isar_feature(aa64_sve2, s)) {
2323 return false;
2324 }
2325 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2326}
2327
30562ab7
RH
2328/*
2329 *** SVE Permute - Unpredicated Group
2330 */
2331
3a7be554 2332static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2333{
2334 if (sve_access_check(s)) {
2335 unsigned vsz = vec_full_reg_size(s);
2336 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2337 vsz, vsz, cpu_reg_sp(s, a->rn));
2338 }
2339 return true;
2340}
2341
3a7be554 2342static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2343{
2344 if ((a->imm & 0x1f) == 0) {
2345 return false;
2346 }
2347 if (sve_access_check(s)) {
2348 unsigned vsz = vec_full_reg_size(s);
2349 unsigned dofs = vec_full_reg_offset(s, a->rd);
2350 unsigned esz, index;
2351
2352 esz = ctz32(a->imm);
2353 index = a->imm >> (esz + 1);
2354
2355 if ((index << esz) < vsz) {
2356 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2357 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2358 } else {
7e17d50e
RH
2359 /*
2360 * While dup_mem handles 128-bit elements, dup_imm does not.
2361 * Thankfully element size doesn't matter for splatting zero.
2362 */
2363 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2364 }
2365 }
2366 return true;
2367}
2368
2369static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2370{
2371 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2372 static gen_insr * const fns[4] = {
2373 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2374 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2375 };
2376 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2377 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2378 TCGv_ptr t_zd = tcg_temp_new_ptr();
2379 TCGv_ptr t_zn = tcg_temp_new_ptr();
2380
2381 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2382 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2383
2384 fns[a->esz](t_zd, t_zn, val, desc);
2385
2386 tcg_temp_free_ptr(t_zd);
2387 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2388}
2389
3a7be554 2390static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2391{
2392 if (sve_access_check(s)) {
2393 TCGv_i64 t = tcg_temp_new_i64();
2394 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2395 do_insr_i64(s, a, t);
2396 tcg_temp_free_i64(t);
2397 }
2398 return true;
2399}
2400
3a7be554 2401static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2402{
2403 if (sve_access_check(s)) {
2404 do_insr_i64(s, a, cpu_reg(s, a->rm));
2405 }
2406 return true;
2407}
2408
0ea3cdbf
RH
2409static gen_helper_gvec_2 * const rev_fns[4] = {
2410 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2411 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2412};
2413TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2414
3a7be554 2415static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2416{
2417 static gen_helper_gvec_3 * const fns[4] = {
2418 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2419 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2420 };
84a272f5 2421 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, 0);
30562ab7
RH
2422}
2423
80a712a2
SL
2424static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2425{
2426 static gen_helper_gvec_4 * const fns[4] = {
2427 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2428 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2429 };
2430
2431 if (!dc_isar_feature(aa64_sve2, s)) {
2432 return false;
2433 }
2434 if (sve_access_check(s)) {
2435 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2436 (a->rn + 1) % 32, a->rm, 0);
2437 }
2438 return true;
2439}
2440
2441static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2442{
2443 static gen_helper_gvec_3 * const fns[4] = {
2444 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2445 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2446 };
2447
2448 if (!dc_isar_feature(aa64_sve2, s)) {
2449 return false;
2450 }
84a272f5 2451 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, 0);
80a712a2
SL
2452}
2453
3a7be554 2454static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2455{
2456 static gen_helper_gvec_2 * const fns[4][2] = {
2457 { NULL, NULL },
2458 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2459 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2460 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2461 };
2462
2463 if (a->esz == 0) {
2464 return false;
2465 }
2466 if (sve_access_check(s)) {
2467 unsigned vsz = vec_full_reg_size(s);
2468 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2469 vec_full_reg_offset(s, a->rn)
2470 + (a->h ? vsz / 2 : 0),
2471 vsz, vsz, 0, fns[a->esz][a->u]);
2472 }
2473 return true;
2474}
2475
d731d8cb
RH
2476/*
2477 *** SVE Permute - Predicates Group
2478 */
2479
2480static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2481 gen_helper_gvec_3 *fn)
2482{
2483 if (!sve_access_check(s)) {
2484 return true;
2485 }
2486
2487 unsigned vsz = pred_full_reg_size(s);
2488
d731d8cb
RH
2489 TCGv_ptr t_d = tcg_temp_new_ptr();
2490 TCGv_ptr t_n = tcg_temp_new_ptr();
2491 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2492 uint32_t desc = 0;
d731d8cb 2493
f9b0fcce
RH
2494 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2495 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2496 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2497
2498 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2499 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2500 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2501
c6a59b55 2502 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2503
2504 tcg_temp_free_ptr(t_d);
2505 tcg_temp_free_ptr(t_n);
2506 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2507 return true;
2508}
2509
2510static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2511 gen_helper_gvec_2 *fn)
2512{
2513 if (!sve_access_check(s)) {
2514 return true;
2515 }
2516
2517 unsigned vsz = pred_full_reg_size(s);
2518 TCGv_ptr t_d = tcg_temp_new_ptr();
2519 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2520 uint32_t desc = 0;
d731d8cb
RH
2521
2522 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2523 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2524
70acaafe
RH
2525 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2526 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2527 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2528
c6a59b55 2529 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2530
d731d8cb
RH
2531 tcg_temp_free_ptr(t_d);
2532 tcg_temp_free_ptr(t_n);
2533 return true;
2534}
2535
3a7be554 2536static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2537{
2538 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2539}
2540
3a7be554 2541static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2542{
2543 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2544}
2545
3a7be554 2546static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2547{
2548 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2549}
2550
3a7be554 2551static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2552{
2553 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2554}
2555
3a7be554 2556static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2557{
2558 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2559}
2560
3a7be554 2561static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2562{
2563 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2564}
2565
3a7be554 2566static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2567{
2568 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2569}
2570
3a7be554 2571static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2572{
2573 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2574}
2575
3a7be554 2576static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2577{
2578 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2579}
2580
234b48e9
RH
2581/*
2582 *** SVE Permute - Interleaving Group
2583 */
2584
2585static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2586{
2587 static gen_helper_gvec_3 * const fns[4] = {
2588 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2589 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2590 };
2591
2592 if (sve_access_check(s)) {
2593 unsigned vsz = vec_full_reg_size(s);
2594 unsigned high_ofs = high ? vsz / 2 : 0;
2595 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2596 vec_full_reg_offset(s, a->rn) + high_ofs,
2597 vec_full_reg_offset(s, a->rm) + high_ofs,
2598 vsz, vsz, 0, fns[a->esz]);
2599 }
2600 return true;
2601}
2602
3a7be554 2603static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2604{
2605 return do_zip(s, a, false);
2606}
2607
3a7be554 2608static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2609{
2610 return do_zip(s, a, true);
2611}
2612
74b64b25
RH
2613static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2614{
2615 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2616 return false;
2617 }
2618 if (sve_access_check(s)) {
2619 unsigned vsz = vec_full_reg_size(s);
2620 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2621 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2622 vec_full_reg_offset(s, a->rn) + high_ofs,
2623 vec_full_reg_offset(s, a->rm) + high_ofs,
2624 vsz, vsz, 0, gen_helper_sve2_zip_q);
2625 }
2626 return true;
2627}
2628
2629static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2630{
2631 return do_zip_q(s, a, false);
2632}
2633
2634static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2635{
2636 return do_zip_q(s, a, true);
2637}
2638
234b48e9
RH
2639static gen_helper_gvec_3 * const uzp_fns[4] = {
2640 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2641 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2642};
2643
3a7be554 2644static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9 2645{
84a272f5 2646 return gen_gvec_ool_arg_zzz(s, uzp_fns[a->esz], a, 0);
234b48e9
RH
2647}
2648
3a7be554 2649static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9 2650{
84a272f5 2651 return gen_gvec_ool_arg_zzz(s, uzp_fns[a->esz], a, 1 << a->esz);
234b48e9
RH
2652}
2653
74b64b25
RH
2654static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2655{
2656 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2657 return false;
2658 }
84a272f5 2659 return gen_gvec_ool_arg_zzz(s, gen_helper_sve2_uzp_q, a, 0);
74b64b25
RH
2660}
2661
2662static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2663{
2664 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2665 return false;
2666 }
84a272f5 2667 return gen_gvec_ool_arg_zzz(s, gen_helper_sve2_uzp_q, a, 16);
74b64b25
RH
2668}
2669
234b48e9
RH
2670static gen_helper_gvec_3 * const trn_fns[4] = {
2671 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2672 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2673};
2674
3a7be554 2675static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9 2676{
84a272f5 2677 return gen_gvec_ool_arg_zzz(s, trn_fns[a->esz], a, 0);
234b48e9
RH
2678}
2679
3a7be554 2680static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9 2681{
84a272f5 2682 return gen_gvec_ool_arg_zzz(s, trn_fns[a->esz], a, 1 << a->esz);
234b48e9
RH
2683}
2684
74b64b25
RH
2685static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2686{
2687 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2688 return false;
2689 }
84a272f5 2690 return gen_gvec_ool_arg_zzz(s, gen_helper_sve2_trn_q, a, 0);
74b64b25
RH
2691}
2692
2693static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2694{
2695 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2696 return false;
2697 }
84a272f5 2698 return gen_gvec_ool_arg_zzz(s, gen_helper_sve2_trn_q, a, 16);
74b64b25
RH
2699}
2700
3ca879ae
RH
2701/*
2702 *** SVE Permute Vector - Predicated Group
2703 */
2704
3a7be554 2705static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2706{
2707 static gen_helper_gvec_3 * const fns[4] = {
2708 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2709 };
2710 return do_zpz_ool(s, a, fns[a->esz]);
2711}
2712
ef23cb72
RH
2713/* Call the helper that computes the ARM LastActiveElement pseudocode
2714 * function, scaled by the element size. This includes the not found
2715 * indication; e.g. not found for esz=3 is -8.
2716 */
2717static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2718{
2719 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2720 * round up, as we do elsewhere, because we need the exact size.
2721 */
2722 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2723 unsigned desc = 0;
ef23cb72 2724
2acbfbe4
RH
2725 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2726 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2727
2728 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2729
c6a59b55 2730 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2731
ef23cb72
RH
2732 tcg_temp_free_ptr(t_p);
2733}
2734
2735/* Increment LAST to the offset of the next element in the vector,
2736 * wrapping around to 0.
2737 */
2738static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2739{
2740 unsigned vsz = vec_full_reg_size(s);
2741
2742 tcg_gen_addi_i32(last, last, 1 << esz);
2743 if (is_power_of_2(vsz)) {
2744 tcg_gen_andi_i32(last, last, vsz - 1);
2745 } else {
4b308bd5
RH
2746 TCGv_i32 max = tcg_constant_i32(vsz);
2747 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2748 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2749 }
2750}
2751
2752/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2753static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2754{
2755 unsigned vsz = vec_full_reg_size(s);
2756
2757 if (is_power_of_2(vsz)) {
2758 tcg_gen_andi_i32(last, last, vsz - 1);
2759 } else {
4b308bd5
RH
2760 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2761 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2762 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2763 }
2764}
2765
2766/* Load an unsigned element of ESZ from BASE+OFS. */
2767static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2768{
2769 TCGv_i64 r = tcg_temp_new_i64();
2770
2771 switch (esz) {
2772 case 0:
2773 tcg_gen_ld8u_i64(r, base, ofs);
2774 break;
2775 case 1:
2776 tcg_gen_ld16u_i64(r, base, ofs);
2777 break;
2778 case 2:
2779 tcg_gen_ld32u_i64(r, base, ofs);
2780 break;
2781 case 3:
2782 tcg_gen_ld_i64(r, base, ofs);
2783 break;
2784 default:
2785 g_assert_not_reached();
2786 }
2787 return r;
2788}
2789
2790/* Load an unsigned element of ESZ from RM[LAST]. */
2791static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2792 int rm, int esz)
2793{
2794 TCGv_ptr p = tcg_temp_new_ptr();
2795 TCGv_i64 r;
2796
2797 /* Convert offset into vector into offset into ENV.
2798 * The final adjustment for the vector register base
2799 * is added via constant offset to the load.
2800 */
e03b5686 2801#if HOST_BIG_ENDIAN
ef23cb72
RH
2802 /* Adjust for element ordering. See vec_reg_offset. */
2803 if (esz < 3) {
2804 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2805 }
2806#endif
2807 tcg_gen_ext_i32_ptr(p, last);
2808 tcg_gen_add_ptr(p, p, cpu_env);
2809
2810 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2811 tcg_temp_free_ptr(p);
2812
2813 return r;
2814}
2815
2816/* Compute CLAST for a Zreg. */
2817static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2818{
2819 TCGv_i32 last;
2820 TCGLabel *over;
2821 TCGv_i64 ele;
2822 unsigned vsz, esz = a->esz;
2823
2824 if (!sve_access_check(s)) {
2825 return true;
2826 }
2827
2828 last = tcg_temp_local_new_i32();
2829 over = gen_new_label();
2830
2831 find_last_active(s, last, esz, a->pg);
2832
2833 /* There is of course no movcond for a 2048-bit vector,
2834 * so we must branch over the actual store.
2835 */
2836 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2837
2838 if (!before) {
2839 incr_last_active(s, last, esz);
2840 }
2841
2842 ele = load_last_active(s, last, a->rm, esz);
2843 tcg_temp_free_i32(last);
2844
2845 vsz = vec_full_reg_size(s);
2846 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2847 tcg_temp_free_i64(ele);
2848
2849 /* If this insn used MOVPRFX, we may need a second move. */
2850 if (a->rd != a->rn) {
2851 TCGLabel *done = gen_new_label();
2852 tcg_gen_br(done);
2853
2854 gen_set_label(over);
2855 do_mov_z(s, a->rd, a->rn);
2856
2857 gen_set_label(done);
2858 } else {
2859 gen_set_label(over);
2860 }
2861 return true;
2862}
2863
3a7be554 2864static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2865{
2866 return do_clast_vector(s, a, false);
2867}
2868
3a7be554 2869static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2870{
2871 return do_clast_vector(s, a, true);
2872}
2873
2874/* Compute CLAST for a scalar. */
2875static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2876 bool before, TCGv_i64 reg_val)
2877{
2878 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2879 TCGv_i64 ele, cmp;
ef23cb72
RH
2880
2881 find_last_active(s, last, esz, pg);
2882
2883 /* Extend the original value of last prior to incrementing. */
2884 cmp = tcg_temp_new_i64();
2885 tcg_gen_ext_i32_i64(cmp, last);
2886
2887 if (!before) {
2888 incr_last_active(s, last, esz);
2889 }
2890
2891 /* The conceit here is that while last < 0 indicates not found, after
2892 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2893 * from which we can load garbage. We then discard the garbage with
2894 * a conditional move.
2895 */
2896 ele = load_last_active(s, last, rm, esz);
2897 tcg_temp_free_i32(last);
2898
053552d3
RH
2899 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2900 ele, reg_val);
ef23cb72 2901
ef23cb72
RH
2902 tcg_temp_free_i64(cmp);
2903 tcg_temp_free_i64(ele);
2904}
2905
2906/* Compute CLAST for a Vreg. */
2907static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2908{
2909 if (sve_access_check(s)) {
2910 int esz = a->esz;
2911 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2912 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2913
2914 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2915 write_fp_dreg(s, a->rd, reg);
2916 tcg_temp_free_i64(reg);
2917 }
2918 return true;
2919}
2920
3a7be554 2921static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2922{
2923 return do_clast_fp(s, a, false);
2924}
2925
3a7be554 2926static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2927{
2928 return do_clast_fp(s, a, true);
2929}
2930
2931/* Compute CLAST for a Xreg. */
2932static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2933{
2934 TCGv_i64 reg;
2935
2936 if (!sve_access_check(s)) {
2937 return true;
2938 }
2939
2940 reg = cpu_reg(s, a->rd);
2941 switch (a->esz) {
2942 case 0:
2943 tcg_gen_ext8u_i64(reg, reg);
2944 break;
2945 case 1:
2946 tcg_gen_ext16u_i64(reg, reg);
2947 break;
2948 case 2:
2949 tcg_gen_ext32u_i64(reg, reg);
2950 break;
2951 case 3:
2952 break;
2953 default:
2954 g_assert_not_reached();
2955 }
2956
2957 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2958 return true;
2959}
2960
3a7be554 2961static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2962{
2963 return do_clast_general(s, a, false);
2964}
2965
3a7be554 2966static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2967{
2968 return do_clast_general(s, a, true);
2969}
2970
2971/* Compute LAST for a scalar. */
2972static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2973 int pg, int rm, bool before)
2974{
2975 TCGv_i32 last = tcg_temp_new_i32();
2976 TCGv_i64 ret;
2977
2978 find_last_active(s, last, esz, pg);
2979 if (before) {
2980 wrap_last_active(s, last, esz);
2981 } else {
2982 incr_last_active(s, last, esz);
2983 }
2984
2985 ret = load_last_active(s, last, rm, esz);
2986 tcg_temp_free_i32(last);
2987 return ret;
2988}
2989
2990/* Compute LAST for a Vreg. */
2991static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2992{
2993 if (sve_access_check(s)) {
2994 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2995 write_fp_dreg(s, a->rd, val);
2996 tcg_temp_free_i64(val);
2997 }
2998 return true;
2999}
3000
3a7be554 3001static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3002{
3003 return do_last_fp(s, a, false);
3004}
3005
3a7be554 3006static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3007{
3008 return do_last_fp(s, a, true);
3009}
3010
3011/* Compute LAST for a Xreg. */
3012static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3013{
3014 if (sve_access_check(s)) {
3015 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3016 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3017 tcg_temp_free_i64(val);
3018 }
3019 return true;
3020}
3021
3a7be554 3022static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3023{
3024 return do_last_general(s, a, false);
3025}
3026
3a7be554 3027static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3028{
3029 return do_last_general(s, a, true);
3030}
3031
3a7be554 3032static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3033{
3034 if (sve_access_check(s)) {
3035 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3036 }
3037 return true;
3038}
3039
3a7be554 3040static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3041{
3042 if (sve_access_check(s)) {
3043 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3044 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3045 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3046 tcg_temp_free_i64(t);
3047 }
3048 return true;
3049}
3050
3a7be554 3051static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3052{
3053 static gen_helper_gvec_3 * const fns[4] = {
3054 NULL,
3055 gen_helper_sve_revb_h,
3056 gen_helper_sve_revb_s,
3057 gen_helper_sve_revb_d,
3058 };
3059 return do_zpz_ool(s, a, fns[a->esz]);
3060}
3061
3a7be554 3062static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3063{
3064 static gen_helper_gvec_3 * const fns[4] = {
3065 NULL,
3066 NULL,
3067 gen_helper_sve_revh_s,
3068 gen_helper_sve_revh_d,
3069 };
3070 return do_zpz_ool(s, a, fns[a->esz]);
3071}
3072
3a7be554 3073static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3074{
3075 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3076}
3077
3a7be554 3078static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3079{
3080 static gen_helper_gvec_3 * const fns[4] = {
3081 gen_helper_sve_rbit_b,
3082 gen_helper_sve_rbit_h,
3083 gen_helper_sve_rbit_s,
3084 gen_helper_sve_rbit_d,
3085 };
3086 return do_zpz_ool(s, a, fns[a->esz]);
3087}
3088
3a7be554 3089static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
3090{
3091 if (sve_access_check(s)) {
36cbb7a8 3092 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 3093 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
3094 }
3095 return true;
3096}
3097
75114792
SL
3098static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3099{
3100 if (!dc_isar_feature(aa64_sve2, s)) {
3101 return false;
3102 }
3103 if (sve_access_check(s)) {
3104 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3105 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3106 }
3107 return true;
3108}
3109
757f9cff
RH
3110/*
3111 *** SVE Integer Compare - Vectors Group
3112 */
3113
3114static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3115 gen_helper_gvec_flags_4 *gen_fn)
3116{
3117 TCGv_ptr pd, zn, zm, pg;
3118 unsigned vsz;
3119 TCGv_i32 t;
3120
3121 if (gen_fn == NULL) {
3122 return false;
3123 }
3124 if (!sve_access_check(s)) {
3125 return true;
3126 }
3127
3128 vsz = vec_full_reg_size(s);
392acacc 3129 t = tcg_temp_new_i32();
757f9cff
RH
3130 pd = tcg_temp_new_ptr();
3131 zn = tcg_temp_new_ptr();
3132 zm = tcg_temp_new_ptr();
3133 pg = tcg_temp_new_ptr();
3134
3135 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3136 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3137 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3138 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3139
392acacc 3140 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
3141
3142 tcg_temp_free_ptr(pd);
3143 tcg_temp_free_ptr(zn);
3144 tcg_temp_free_ptr(zm);
3145 tcg_temp_free_ptr(pg);
3146
3147 do_pred_flags(t);
3148
3149 tcg_temp_free_i32(t);
3150 return true;
3151}
3152
3153#define DO_PPZZ(NAME, name) \
3a7be554 3154static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3155{ \
3156 static gen_helper_gvec_flags_4 * const fns[4] = { \
3157 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3158 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3159 }; \
3160 return do_ppzz_flags(s, a, fns[a->esz]); \
3161}
3162
3163DO_PPZZ(CMPEQ, cmpeq)
3164DO_PPZZ(CMPNE, cmpne)
3165DO_PPZZ(CMPGT, cmpgt)
3166DO_PPZZ(CMPGE, cmpge)
3167DO_PPZZ(CMPHI, cmphi)
3168DO_PPZZ(CMPHS, cmphs)
3169
3170#undef DO_PPZZ
3171
3172#define DO_PPZW(NAME, name) \
3a7be554 3173static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3174{ \
3175 static gen_helper_gvec_flags_4 * const fns[4] = { \
3176 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3177 gen_helper_sve_##name##_ppzw_s, NULL \
3178 }; \
3179 return do_ppzz_flags(s, a, fns[a->esz]); \
3180}
3181
3182DO_PPZW(CMPEQ, cmpeq)
3183DO_PPZW(CMPNE, cmpne)
3184DO_PPZW(CMPGT, cmpgt)
3185DO_PPZW(CMPGE, cmpge)
3186DO_PPZW(CMPHI, cmphi)
3187DO_PPZW(CMPHS, cmphs)
3188DO_PPZW(CMPLT, cmplt)
3189DO_PPZW(CMPLE, cmple)
3190DO_PPZW(CMPLO, cmplo)
3191DO_PPZW(CMPLS, cmpls)
3192
3193#undef DO_PPZW
3194
38cadeba
RH
3195/*
3196 *** SVE Integer Compare - Immediate Groups
3197 */
3198
3199static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3200 gen_helper_gvec_flags_3 *gen_fn)
3201{
3202 TCGv_ptr pd, zn, pg;
3203 unsigned vsz;
3204 TCGv_i32 t;
3205
3206 if (gen_fn == NULL) {
3207 return false;
3208 }
3209 if (!sve_access_check(s)) {
3210 return true;
3211 }
3212
3213 vsz = vec_full_reg_size(s);
392acacc 3214 t = tcg_temp_new_i32();
38cadeba
RH
3215 pd = tcg_temp_new_ptr();
3216 zn = tcg_temp_new_ptr();
3217 pg = tcg_temp_new_ptr();
3218
3219 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3220 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3221 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3222
392acacc 3223 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3224
3225 tcg_temp_free_ptr(pd);
3226 tcg_temp_free_ptr(zn);
3227 tcg_temp_free_ptr(pg);
3228
3229 do_pred_flags(t);
3230
3231 tcg_temp_free_i32(t);
3232 return true;
3233}
3234
3235#define DO_PPZI(NAME, name) \
3a7be554 3236static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3237{ \
3238 static gen_helper_gvec_flags_3 * const fns[4] = { \
3239 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3240 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3241 }; \
3242 return do_ppzi_flags(s, a, fns[a->esz]); \
3243}
3244
3245DO_PPZI(CMPEQ, cmpeq)
3246DO_PPZI(CMPNE, cmpne)
3247DO_PPZI(CMPGT, cmpgt)
3248DO_PPZI(CMPGE, cmpge)
3249DO_PPZI(CMPHI, cmphi)
3250DO_PPZI(CMPHS, cmphs)
3251DO_PPZI(CMPLT, cmplt)
3252DO_PPZI(CMPLE, cmple)
3253DO_PPZI(CMPLO, cmplo)
3254DO_PPZI(CMPLS, cmpls)
3255
3256#undef DO_PPZI
3257
35da316f
RH
3258/*
3259 *** SVE Partition Break Group
3260 */
3261
3262static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3263 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3264{
3265 if (!sve_access_check(s)) {
3266 return true;
3267 }
3268
3269 unsigned vsz = pred_full_reg_size(s);
3270
3271 /* Predicate sizes may be smaller and cannot use simd_desc. */
3272 TCGv_ptr d = tcg_temp_new_ptr();
3273 TCGv_ptr n = tcg_temp_new_ptr();
3274 TCGv_ptr m = tcg_temp_new_ptr();
3275 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3276 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3277
3278 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3279 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3280 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3281 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3282
3283 if (a->s) {
93418f1c
RH
3284 TCGv_i32 t = tcg_temp_new_i32();
3285 fn_s(t, d, n, m, g, desc);
35da316f 3286 do_pred_flags(t);
93418f1c 3287 tcg_temp_free_i32(t);
35da316f 3288 } else {
93418f1c 3289 fn(d, n, m, g, desc);
35da316f
RH
3290 }
3291 tcg_temp_free_ptr(d);
3292 tcg_temp_free_ptr(n);
3293 tcg_temp_free_ptr(m);
3294 tcg_temp_free_ptr(g);
35da316f
RH
3295 return true;
3296}
3297
3298static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3299 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3300{
3301 if (!sve_access_check(s)) {
3302 return true;
3303 }
3304
3305 unsigned vsz = pred_full_reg_size(s);
3306
3307 /* Predicate sizes may be smaller and cannot use simd_desc. */
3308 TCGv_ptr d = tcg_temp_new_ptr();
3309 TCGv_ptr n = tcg_temp_new_ptr();
3310 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3311 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3312
3313 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3314 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3315 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3316
3317 if (a->s) {
93418f1c
RH
3318 TCGv_i32 t = tcg_temp_new_i32();
3319 fn_s(t, d, n, g, desc);
35da316f 3320 do_pred_flags(t);
93418f1c 3321 tcg_temp_free_i32(t);
35da316f 3322 } else {
93418f1c 3323 fn(d, n, g, desc);
35da316f
RH
3324 }
3325 tcg_temp_free_ptr(d);
3326 tcg_temp_free_ptr(n);
3327 tcg_temp_free_ptr(g);
35da316f
RH
3328 return true;
3329}
3330
3a7be554 3331static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3332{
3333 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3334}
3335
3a7be554 3336static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3337{
3338 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3339}
3340
3a7be554 3341static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3342{
3343 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3344}
3345
3a7be554 3346static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3347{
3348 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3349}
3350
3a7be554 3351static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3352{
3353 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3354}
3355
3a7be554 3356static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3357{
3358 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3359}
3360
3a7be554 3361static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3362{
3363 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3364}
3365
9ee3a611
RH
3366/*
3367 *** SVE Predicate Count Group
3368 */
3369
3370static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3371{
3372 unsigned psz = pred_full_reg_size(s);
3373
3374 if (psz <= 8) {
3375 uint64_t psz_mask;
3376
3377 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3378 if (pn != pg) {
3379 TCGv_i64 g = tcg_temp_new_i64();
3380 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3381 tcg_gen_and_i64(val, val, g);
3382 tcg_temp_free_i64(g);
3383 }
3384
3385 /* Reduce the pred_esz_masks value simply to reduce the
3386 * size of the code generated here.
3387 */
3388 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3389 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3390
3391 tcg_gen_ctpop_i64(val, val);
3392 } else {
3393 TCGv_ptr t_pn = tcg_temp_new_ptr();
3394 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3395 unsigned desc = 0;
9ee3a611 3396
f556a201
RH
3397 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3398 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3399
3400 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3401 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3402
c6a59b55 3403 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3404 tcg_temp_free_ptr(t_pn);
3405 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3406 }
3407}
3408
3a7be554 3409static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3410{
3411 if (sve_access_check(s)) {
3412 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3413 }
3414 return true;
3415}
3416
3a7be554 3417static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3418{
3419 if (sve_access_check(s)) {
3420 TCGv_i64 reg = cpu_reg(s, a->rd);
3421 TCGv_i64 val = tcg_temp_new_i64();
3422
3423 do_cntp(s, val, a->esz, a->pg, a->pg);
3424 if (a->d) {
3425 tcg_gen_sub_i64(reg, reg, val);
3426 } else {
3427 tcg_gen_add_i64(reg, reg, val);
3428 }
3429 tcg_temp_free_i64(val);
3430 }
3431 return true;
3432}
3433
3a7be554 3434static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3435{
3436 if (a->esz == 0) {
3437 return false;
3438 }
3439 if (sve_access_check(s)) {
3440 unsigned vsz = vec_full_reg_size(s);
3441 TCGv_i64 val = tcg_temp_new_i64();
3442 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3443
3444 do_cntp(s, val, a->esz, a->pg, a->pg);
3445 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3446 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3447 }
3448 return true;
3449}
3450
3a7be554 3451static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3452{
3453 if (sve_access_check(s)) {
3454 TCGv_i64 reg = cpu_reg(s, a->rd);
3455 TCGv_i64 val = tcg_temp_new_i64();
3456
3457 do_cntp(s, val, a->esz, a->pg, a->pg);
3458 do_sat_addsub_32(reg, val, a->u, a->d);
3459 }
3460 return true;
3461}
3462
3a7be554 3463static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3464{
3465 if (sve_access_check(s)) {
3466 TCGv_i64 reg = cpu_reg(s, a->rd);
3467 TCGv_i64 val = tcg_temp_new_i64();
3468
3469 do_cntp(s, val, a->esz, a->pg, a->pg);
3470 do_sat_addsub_64(reg, val, a->u, a->d);
3471 }
3472 return true;
3473}
3474
3a7be554 3475static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3476{
3477 if (a->esz == 0) {
3478 return false;
3479 }
3480 if (sve_access_check(s)) {
3481 TCGv_i64 val = tcg_temp_new_i64();
3482 do_cntp(s, val, a->esz, a->pg, a->pg);
3483 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3484 }
3485 return true;
3486}
3487
caf1cefc
RH
3488/*
3489 *** SVE Integer Compare Scalars Group
3490 */
3491
3a7be554 3492static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3493{
3494 if (!sve_access_check(s)) {
3495 return true;
3496 }
3497
3498 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3499 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3500 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3501 TCGv_i64 cmp = tcg_temp_new_i64();
3502
3503 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3504 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3505 tcg_temp_free_i64(cmp);
3506
3507 /* VF = !NF & !CF. */
3508 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3509 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3510
3511 /* Both NF and VF actually look at bit 31. */
3512 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3513 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3514 return true;
3515}
3516
3a7be554 3517static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3518{
bbd0968c 3519 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3520 TCGv_i32 t2;
caf1cefc 3521 TCGv_ptr ptr;
e610906c
RH
3522 unsigned vsz = vec_full_reg_size(s);
3523 unsigned desc = 0;
caf1cefc 3524 TCGCond cond;
34688dbc
RH
3525 uint64_t maxval;
3526 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3527 bool eq = a->eq == a->lt;
caf1cefc 3528
34688dbc
RH
3529 /* The greater-than conditions are all SVE2. */
3530 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3531 return false;
3532 }
bbd0968c
RH
3533 if (!sve_access_check(s)) {
3534 return true;
3535 }
3536
3537 op0 = read_cpu_reg(s, a->rn, 1);
3538 op1 = read_cpu_reg(s, a->rm, 1);
3539
caf1cefc
RH
3540 if (!a->sf) {
3541 if (a->u) {
3542 tcg_gen_ext32u_i64(op0, op0);
3543 tcg_gen_ext32u_i64(op1, op1);
3544 } else {
3545 tcg_gen_ext32s_i64(op0, op0);
3546 tcg_gen_ext32s_i64(op1, op1);
3547 }
3548 }
3549
3550 /* For the helper, compress the different conditions into a computation
3551 * of how many iterations for which the condition is true.
caf1cefc 3552 */
bbd0968c
RH
3553 t0 = tcg_temp_new_i64();
3554 t1 = tcg_temp_new_i64();
34688dbc
RH
3555
3556 if (a->lt) {
3557 tcg_gen_sub_i64(t0, op1, op0);
3558 if (a->u) {
3559 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3560 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3561 } else {
3562 maxval = a->sf ? INT64_MAX : INT32_MAX;
3563 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3564 }
3565 } else {
3566 tcg_gen_sub_i64(t0, op0, op1);
3567 if (a->u) {
3568 maxval = 0;
3569 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3570 } else {
3571 maxval = a->sf ? INT64_MIN : INT32_MIN;
3572 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3573 }
3574 }
caf1cefc 3575
4481bbf2 3576 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3577 if (eq) {
caf1cefc
RH
3578 /* Equality means one more iteration. */
3579 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3580
34688dbc
RH
3581 /*
3582 * For the less-than while, if op1 is maxval (and the only time
3583 * the addition above could overflow), then we produce an all-true
3584 * predicate by setting the count to the vector length. This is
3585 * because the pseudocode is described as an increment + compare
3586 * loop, and the maximum integer would always compare true.
3587 * Similarly, the greater-than while has the same issue with the
3588 * minimum integer due to the decrement + compare loop.
bbd0968c 3589 */
34688dbc 3590 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3591 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3592 }
3593
bbd0968c
RH
3594 /* Bound to the maximum. */
3595 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3596
3597 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3598 tcg_gen_movi_i64(t1, 0);
3599 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3600 tcg_temp_free_i64(t1);
caf1cefc 3601
bbd0968c 3602 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3603 t2 = tcg_temp_new_i32();
3604 tcg_gen_extrl_i64_i32(t2, t0);
3605 tcg_temp_free_i64(t0);
bbd0968c
RH
3606
3607 /* Scale elements to bits. */
3608 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3609
e610906c
RH
3610 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3611 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3612
3613 ptr = tcg_temp_new_ptr();
3614 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3615
34688dbc 3616 if (a->lt) {
4481bbf2 3617 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3618 } else {
4481bbf2 3619 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3620 }
caf1cefc
RH
3621 do_pred_flags(t2);
3622
3623 tcg_temp_free_ptr(ptr);
3624 tcg_temp_free_i32(t2);
caf1cefc
RH
3625 return true;
3626}
3627
14f6dad1
RH
3628static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3629{
3630 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3631 TCGv_i32 t2;
14f6dad1
RH
3632 TCGv_ptr ptr;
3633 unsigned vsz = vec_full_reg_size(s);
3634 unsigned desc = 0;
3635
3636 if (!dc_isar_feature(aa64_sve2, s)) {
3637 return false;
3638 }
3639 if (!sve_access_check(s)) {
3640 return true;
3641 }
3642
3643 op0 = read_cpu_reg(s, a->rn, 1);
3644 op1 = read_cpu_reg(s, a->rm, 1);
3645
4481bbf2 3646 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3647 diff = tcg_temp_new_i64();
3648
3649 if (a->rw) {
3650 /* WHILERW */
3651 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3652 t1 = tcg_temp_new_i64();
3653 tcg_gen_sub_i64(diff, op0, op1);
3654 tcg_gen_sub_i64(t1, op1, op0);
3655 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3656 tcg_temp_free_i64(t1);
3657 /* Round down to a multiple of ESIZE. */
3658 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3659 /* If op1 == op0, diff == 0, and the condition is always true. */
3660 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3661 } else {
3662 /* WHILEWR */
3663 tcg_gen_sub_i64(diff, op1, op0);
3664 /* Round down to a multiple of ESIZE. */
3665 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3666 /* If op0 >= op1, diff <= 0, the condition is always true. */
3667 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3668 }
3669
3670 /* Bound to the maximum. */
3671 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3672
3673 /* Since we're bounded, pass as a 32-bit type. */
3674 t2 = tcg_temp_new_i32();
3675 tcg_gen_extrl_i64_i32(t2, diff);
3676 tcg_temp_free_i64(diff);
3677
3678 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3679 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3680
3681 ptr = tcg_temp_new_ptr();
3682 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3683
4481bbf2 3684 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3685 do_pred_flags(t2);
3686
3687 tcg_temp_free_ptr(ptr);
3688 tcg_temp_free_i32(t2);
14f6dad1
RH
3689 return true;
3690}
3691
ed491961
RH
3692/*
3693 *** SVE Integer Wide Immediate - Unpredicated Group
3694 */
3695
3a7be554 3696static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3697{
3698 if (a->esz == 0) {
3699 return false;
3700 }
3701 if (sve_access_check(s)) {
3702 unsigned vsz = vec_full_reg_size(s);
3703 int dofs = vec_full_reg_offset(s, a->rd);
3704 uint64_t imm;
3705
3706 /* Decode the VFP immediate. */
3707 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3708 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3709 }
3710 return true;
3711}
3712
3a7be554 3713static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3714{
3a7be554 3715 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3716 return false;
3717 }
3718 if (sve_access_check(s)) {
3719 unsigned vsz = vec_full_reg_size(s);
3720 int dofs = vec_full_reg_offset(s, a->rd);
3721
8711e71f 3722 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3723 }
3724 return true;
3725}
3726
3a7be554 3727static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3728{
3a7be554 3729 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3730 return false;
3731 }
3732 if (sve_access_check(s)) {
3733 unsigned vsz = vec_full_reg_size(s);
3734 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3735 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3736 }
3737 return true;
3738}
3739
3a7be554 3740static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3741{
3742 a->imm = -a->imm;
3a7be554 3743 return trans_ADD_zzi(s, a);
6e6a157d
RH
3744}
3745
3a7be554 3746static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3747{
53229a77 3748 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3749 static const GVecGen2s op[4] = {
3750 { .fni8 = tcg_gen_vec_sub8_i64,
3751 .fniv = tcg_gen_sub_vec,
3752 .fno = gen_helper_sve_subri_b,
53229a77 3753 .opt_opc = vecop_list,
6e6a157d
RH
3754 .vece = MO_8,
3755 .scalar_first = true },
3756 { .fni8 = tcg_gen_vec_sub16_i64,
3757 .fniv = tcg_gen_sub_vec,
3758 .fno = gen_helper_sve_subri_h,
53229a77 3759 .opt_opc = vecop_list,
6e6a157d
RH
3760 .vece = MO_16,
3761 .scalar_first = true },
3762 { .fni4 = tcg_gen_sub_i32,
3763 .fniv = tcg_gen_sub_vec,
3764 .fno = gen_helper_sve_subri_s,
53229a77 3765 .opt_opc = vecop_list,
6e6a157d
RH
3766 .vece = MO_32,
3767 .scalar_first = true },
3768 { .fni8 = tcg_gen_sub_i64,
3769 .fniv = tcg_gen_sub_vec,
3770 .fno = gen_helper_sve_subri_d,
53229a77 3771 .opt_opc = vecop_list,
6e6a157d
RH
3772 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3773 .vece = MO_64,
3774 .scalar_first = true }
3775 };
3776
3a7be554 3777 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3778 return false;
3779 }
3780 if (sve_access_check(s)) {
3781 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3782 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3783 vec_full_reg_offset(s, a->rn),
9fff3fcc 3784 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3785 }
3786 return true;
3787}
3788
3a7be554 3789static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3790{
3791 if (sve_access_check(s)) {
3792 unsigned vsz = vec_full_reg_size(s);
3793 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3794 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3795 }
3796 return true;
3797}
3798
3a7be554 3799static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3800{
3a7be554 3801 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3802 return false;
3803 }
3804 if (sve_access_check(s)) {
138a1f7b
RH
3805 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3806 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3807 }
3808 return true;
3809}
3810
3a7be554 3811static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3812{
3a7be554 3813 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3814}
3815
3a7be554 3816static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3817{
3a7be554 3818 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3819}
3820
3a7be554 3821static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3822{
3a7be554 3823 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3824}
3825
3a7be554 3826static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3827{
3a7be554 3828 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3829}
3830
3831static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3832{
3833 if (sve_access_check(s)) {
3834 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3835 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3836 vec_full_reg_offset(s, a->rn),
138a1f7b 3837 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3838 }
3839 return true;
3840}
3841
3842#define DO_ZZI(NAME, name) \
3a7be554 3843static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3844{ \
3845 static gen_helper_gvec_2i * const fns[4] = { \
3846 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3847 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3848 }; \
3849 return do_zzi_ool(s, a, fns[a->esz]); \
3850}
3851
3852DO_ZZI(SMAX, smax)
3853DO_ZZI(UMAX, umax)
3854DO_ZZI(SMIN, smin)
3855DO_ZZI(UMIN, umin)
3856
3857#undef DO_ZZI
3858
bc2bd697 3859static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
d730ecaa 3860{
bc2bd697 3861 static gen_helper_gvec_4 * const fns[2][2] = {
d730ecaa
RH
3862 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3863 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3864 };
3865
3866 if (sve_access_check(s)) {
bc2bd697 3867 gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
d730ecaa
RH
3868 }
3869 return true;
3870}
3871
814d4c52
RH
3872/*
3873 * SVE Multiply - Indexed
3874 */
3875
0a82d963
RH
3876static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3877 gen_helper_gvec_4 *fn)
16fcfdc7 3878{
0a82d963
RH
3879 if (fn == NULL) {
3880 return false;
3881 }
16fcfdc7 3882 if (sve_access_check(s)) {
0a82d963 3883 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
16fcfdc7
RH
3884 }
3885 return true;
3886}
3887
0a82d963
RH
3888#define DO_RRXR(NAME, FUNC) \
3889 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3890 { return do_zzxz_ool(s, a, FUNC); }
3891
3892DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3893DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3894DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3895DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3896
2867039a
RH
3897static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3898{
3899 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3900 return false;
3901 }
3902 return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3903}
3904
3905static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3906{
3907 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3908 return false;
3909 }
3910 return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
3911}
3912
0a82d963 3913#undef DO_RRXR
16fcfdc7 3914
814d4c52
RH
3915static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
3916 gen_helper_gvec_3 *fn)
3917{
3918 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
3919 return false;
3920 }
3921 if (sve_access_check(s)) {
3922 unsigned vsz = vec_full_reg_size(s);
3923 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
3924 vec_full_reg_offset(s, rn),
3925 vec_full_reg_offset(s, rm),
3926 vsz, vsz, data, fn);
3927 }
3928 return true;
3929}
3930
3931#define DO_SVE2_RRX(NAME, FUNC) \
3932 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
3933 { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
3934
3935DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3936DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3937DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3938
1aee2d70
RH
3939DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3940DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3941DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3942
3943DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3944DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3945DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3946
814d4c52
RH
3947#undef DO_SVE2_RRX
3948
b95f5eeb
RH
3949#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3950 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
3951 { \
3952 return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \
3953 (a->index << 1) | TOP, FUNC); \
3954 }
3955
3956DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3957DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3958DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3959DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3960
d3949c4c
RH
3961DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3962DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3963DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3964DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3965
3966DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3967DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3968DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3969DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3970
b95f5eeb
RH
3971#undef DO_SVE2_RRX_TB
3972
8a02aac7
RH
3973static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
3974 int data, gen_helper_gvec_4 *fn)
3975{
3976 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
3977 return false;
3978 }
3979 if (sve_access_check(s)) {
3980 unsigned vsz = vec_full_reg_size(s);
3981 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
3982 vec_full_reg_offset(s, rn),
3983 vec_full_reg_offset(s, rm),
3984 vec_full_reg_offset(s, ra),
3985 vsz, vsz, data, fn);
3986 }
3987 return true;
3988}
3989
3990#define DO_SVE2_RRXR(NAME, FUNC) \
3991 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3992 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
3993
3994DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3995DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3996DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3997
3998DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3999DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4000DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4001
75d6d5fc
RH
4002DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4003DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4004DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4005
4006DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4007DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4008DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4009
8a02aac7
RH
4010#undef DO_SVE2_RRXR
4011
c5c455d7
RH
4012#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4013 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4014 { \
4015 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4016 (a->index << 1) | TOP, FUNC); \
4017 }
4018
4019DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4020DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4021DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4022DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4023
4024DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4025DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4026DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4027DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
d462469f
RH
4028
4029DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4030DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4031DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4032DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4033
4034DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4035DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4036DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4037DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4038
4039DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4040DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4041DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4042DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4043
4044DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4045DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4046DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4047DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
4048
4049#undef DO_SVE2_RRXR_TB
4050
3b787ed8
RH
4051#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4052 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
4053 { \
4054 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \
4055 (a->index << 2) | a->rot, FUNC); \
4056 }
4057
4058DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4059DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4060
4061DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4062DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4063
21068f39
RH
4064DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4065DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4066
3b787ed8
RH
4067#undef DO_SVE2_RRXR_ROT
4068
ca40a6e6
RH
4069/*
4070 *** SVE Floating Point Multiply-Add Indexed Group
4071 */
4072
0a82d963 4073static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
4074{
4075 static gen_helper_gvec_4_ptr * const fns[3] = {
4076 gen_helper_gvec_fmla_idx_h,
4077 gen_helper_gvec_fmla_idx_s,
4078 gen_helper_gvec_fmla_idx_d,
4079 };
4080
4081 if (sve_access_check(s)) {
4082 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4083 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4084 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4085 vec_full_reg_offset(s, a->rn),
4086 vec_full_reg_offset(s, a->rm),
4087 vec_full_reg_offset(s, a->ra),
0a82d963 4088 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
4089 fns[a->esz - 1]);
4090 tcg_temp_free_ptr(status);
4091 }
4092 return true;
4093}
4094
0a82d963
RH
4095static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4096{
4097 return do_FMLA_zzxz(s, a, false);
4098}
4099
4100static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4101{
4102 return do_FMLA_zzxz(s, a, true);
4103}
4104
ca40a6e6
RH
4105/*
4106 *** SVE Floating Point Multiply Indexed Group
4107 */
4108
3a7be554 4109static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
4110{
4111 static gen_helper_gvec_3_ptr * const fns[3] = {
4112 gen_helper_gvec_fmul_idx_h,
4113 gen_helper_gvec_fmul_idx_s,
4114 gen_helper_gvec_fmul_idx_d,
4115 };
4116
4117 if (sve_access_check(s)) {
4118 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4119 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4120 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4121 vec_full_reg_offset(s, a->rn),
4122 vec_full_reg_offset(s, a->rm),
4123 status, vsz, vsz, a->index, fns[a->esz - 1]);
4124 tcg_temp_free_ptr(status);
4125 }
4126 return true;
4127}
4128
23fbe79f
RH
4129/*
4130 *** SVE Floating Point Fast Reduction Group
4131 */
4132
4133typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4134 TCGv_ptr, TCGv_i32);
4135
4136static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4137 gen_helper_fp_reduce *fn)
4138{
4139 unsigned vsz = vec_full_reg_size(s);
4140 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 4141 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
4142 TCGv_ptr t_zn, t_pg, status;
4143 TCGv_i64 temp;
4144
4145 temp = tcg_temp_new_i64();
4146 t_zn = tcg_temp_new_ptr();
4147 t_pg = tcg_temp_new_ptr();
4148
4149 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4150 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4151 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
4152
4153 fn(temp, t_zn, t_pg, status, t_desc);
4154 tcg_temp_free_ptr(t_zn);
4155 tcg_temp_free_ptr(t_pg);
4156 tcg_temp_free_ptr(status);
23fbe79f
RH
4157
4158 write_fp_dreg(s, a->rd, temp);
4159 tcg_temp_free_i64(temp);
4160}
4161
4162#define DO_VPZ(NAME, name) \
3a7be554 4163static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
4164{ \
4165 static gen_helper_fp_reduce * const fns[3] = { \
4166 gen_helper_sve_##name##_h, \
4167 gen_helper_sve_##name##_s, \
4168 gen_helper_sve_##name##_d, \
4169 }; \
4170 if (a->esz == 0) { \
4171 return false; \
4172 } \
4173 if (sve_access_check(s)) { \
4174 do_reduce(s, a, fns[a->esz - 1]); \
4175 } \
4176 return true; \
4177}
4178
4179DO_VPZ(FADDV, faddv)
4180DO_VPZ(FMINNMV, fminnmv)
4181DO_VPZ(FMAXNMV, fmaxnmv)
4182DO_VPZ(FMINV, fminv)
4183DO_VPZ(FMAXV, fmaxv)
4184
3887c038
RH
4185/*
4186 *** SVE Floating Point Unary Operations - Unpredicated Group
4187 */
4188
4189static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4190{
4191 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4192 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
4193
4194 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4195 vec_full_reg_offset(s, a->rn),
4196 status, vsz, vsz, 0, fn);
4197 tcg_temp_free_ptr(status);
4198}
4199
3a7be554 4200static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4201{
4202 static gen_helper_gvec_2_ptr * const fns[3] = {
4203 gen_helper_gvec_frecpe_h,
4204 gen_helper_gvec_frecpe_s,
4205 gen_helper_gvec_frecpe_d,
4206 };
4207 if (a->esz == 0) {
4208 return false;
4209 }
4210 if (sve_access_check(s)) {
4211 do_zz_fp(s, a, fns[a->esz - 1]);
4212 }
4213 return true;
4214}
4215
3a7be554 4216static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4217{
4218 static gen_helper_gvec_2_ptr * const fns[3] = {
4219 gen_helper_gvec_frsqrte_h,
4220 gen_helper_gvec_frsqrte_s,
4221 gen_helper_gvec_frsqrte_d,
4222 };
4223 if (a->esz == 0) {
4224 return false;
4225 }
4226 if (sve_access_check(s)) {
4227 do_zz_fp(s, a, fns[a->esz - 1]);
4228 }
4229 return true;
4230}
4231
4d2e2a03
RH
4232/*
4233 *** SVE Floating Point Compare with Zero Group
4234 */
4235
4236static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4237 gen_helper_gvec_3_ptr *fn)
4238{
4239 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4240 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4241
4242 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4243 vec_full_reg_offset(s, a->rn),
4244 pred_full_reg_offset(s, a->pg),
4245 status, vsz, vsz, 0, fn);
4246 tcg_temp_free_ptr(status);
4247}
4248
4249#define DO_PPZ(NAME, name) \
3a7be554 4250static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4251{ \
4252 static gen_helper_gvec_3_ptr * const fns[3] = { \
4253 gen_helper_sve_##name##_h, \
4254 gen_helper_sve_##name##_s, \
4255 gen_helper_sve_##name##_d, \
4256 }; \
4257 if (a->esz == 0) { \
4258 return false; \
4259 } \
4260 if (sve_access_check(s)) { \
4261 do_ppz_fp(s, a, fns[a->esz - 1]); \
4262 } \
4263 return true; \
4264}
4265
4266DO_PPZ(FCMGE_ppz0, fcmge0)
4267DO_PPZ(FCMGT_ppz0, fcmgt0)
4268DO_PPZ(FCMLE_ppz0, fcmle0)
4269DO_PPZ(FCMLT_ppz0, fcmlt0)
4270DO_PPZ(FCMEQ_ppz0, fcmeq0)
4271DO_PPZ(FCMNE_ppz0, fcmne0)
4272
4273#undef DO_PPZ
4274
67fcd9ad
RH
4275/*
4276 *** SVE floating-point trig multiply-add coefficient
4277 */
4278
3a7be554 4279static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4280{
4281 static gen_helper_gvec_3_ptr * const fns[3] = {
4282 gen_helper_sve_ftmad_h,
4283 gen_helper_sve_ftmad_s,
4284 gen_helper_sve_ftmad_d,
4285 };
4286
4287 if (a->esz == 0) {
4288 return false;
4289 }
4290 if (sve_access_check(s)) {
4291 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4292 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4293 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4294 vec_full_reg_offset(s, a->rn),
4295 vec_full_reg_offset(s, a->rm),
4296 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4297 tcg_temp_free_ptr(status);
4298 }
4299 return true;
4300}
4301
7f9ddf64
RH
4302/*
4303 *** SVE Floating Point Accumulating Reduction Group
4304 */
4305
3a7be554 4306static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4307{
4308 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4309 TCGv_ptr, TCGv_ptr, TCGv_i32);
4310 static fadda_fn * const fns[3] = {
4311 gen_helper_sve_fadda_h,
4312 gen_helper_sve_fadda_s,
4313 gen_helper_sve_fadda_d,
4314 };
4315 unsigned vsz = vec_full_reg_size(s);
4316 TCGv_ptr t_rm, t_pg, t_fpst;
4317 TCGv_i64 t_val;
4318 TCGv_i32 t_desc;
4319
4320 if (a->esz == 0) {
4321 return false;
4322 }
4323 if (!sve_access_check(s)) {
4324 return true;
4325 }
4326
4327 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4328 t_rm = tcg_temp_new_ptr();
4329 t_pg = tcg_temp_new_ptr();
4330 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4331 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4332 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4333 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
4334
4335 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4336
7f9ddf64
RH
4337 tcg_temp_free_ptr(t_fpst);
4338 tcg_temp_free_ptr(t_pg);
4339 tcg_temp_free_ptr(t_rm);
4340
4341 write_fp_dreg(s, a->rd, t_val);
4342 tcg_temp_free_i64(t_val);
4343 return true;
4344}
4345
29b80469
RH
4346/*
4347 *** SVE Floating Point Arithmetic - Unpredicated Group
4348 */
4349
4350static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4351 gen_helper_gvec_3_ptr *fn)
4352{
4353 if (fn == NULL) {
4354 return false;
4355 }
4356 if (sve_access_check(s)) {
4357 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4358 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4359 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4360 vec_full_reg_offset(s, a->rn),
4361 vec_full_reg_offset(s, a->rm),
4362 status, vsz, vsz, 0, fn);
4363 tcg_temp_free_ptr(status);
4364 }
4365 return true;
4366}
4367
4368
4369#define DO_FP3(NAME, name) \
3a7be554 4370static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4371{ \
4372 static gen_helper_gvec_3_ptr * const fns[4] = { \
4373 NULL, gen_helper_gvec_##name##_h, \
4374 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4375 }; \
4376 return do_zzz_fp(s, a, fns[a->esz]); \
4377}
4378
4379DO_FP3(FADD_zzz, fadd)
4380DO_FP3(FSUB_zzz, fsub)
4381DO_FP3(FMUL_zzz, fmul)
4382DO_FP3(FTSMUL, ftsmul)
4383DO_FP3(FRECPS, recps)
4384DO_FP3(FRSQRTS, rsqrts)
4385
4386#undef DO_FP3
4387
ec3b87c2
RH
4388/*
4389 *** SVE Floating Point Arithmetic - Predicated Group
4390 */
4391
4392static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4393 gen_helper_gvec_4_ptr *fn)
4394{
4395 if (fn == NULL) {
4396 return false;
4397 }
4398 if (sve_access_check(s)) {
4399 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4400 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4401 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4402 vec_full_reg_offset(s, a->rn),
4403 vec_full_reg_offset(s, a->rm),
4404 pred_full_reg_offset(s, a->pg),
4405 status, vsz, vsz, 0, fn);
4406 tcg_temp_free_ptr(status);
4407 }
4408 return true;
4409}
4410
4411#define DO_FP3(NAME, name) \
3a7be554 4412static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4413{ \
4414 static gen_helper_gvec_4_ptr * const fns[4] = { \
4415 NULL, gen_helper_sve_##name##_h, \
4416 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4417 }; \
4418 return do_zpzz_fp(s, a, fns[a->esz]); \
4419}
4420
4421DO_FP3(FADD_zpzz, fadd)
4422DO_FP3(FSUB_zpzz, fsub)
4423DO_FP3(FMUL_zpzz, fmul)
4424DO_FP3(FMIN_zpzz, fmin)
4425DO_FP3(FMAX_zpzz, fmax)
4426DO_FP3(FMINNM_zpzz, fminnum)
4427DO_FP3(FMAXNM_zpzz, fmaxnum)
4428DO_FP3(FABD, fabd)
4429DO_FP3(FSCALE, fscalbn)
4430DO_FP3(FDIV, fdiv)
4431DO_FP3(FMULX, fmulx)
4432
4433#undef DO_FP3
8092c6a3 4434
cc48affe
RH
4435typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4436 TCGv_i64, TCGv_ptr, TCGv_i32);
4437
4438static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4439 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4440{
4441 unsigned vsz = vec_full_reg_size(s);
4442 TCGv_ptr t_zd, t_zn, t_pg, status;
4443 TCGv_i32 desc;
4444
4445 t_zd = tcg_temp_new_ptr();
4446 t_zn = tcg_temp_new_ptr();
4447 t_pg = tcg_temp_new_ptr();
4448 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4449 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4450 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4451
cdfb22bb 4452 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4453 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4454 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4455
cc48affe
RH
4456 tcg_temp_free_ptr(status);
4457 tcg_temp_free_ptr(t_pg);
4458 tcg_temp_free_ptr(t_zn);
4459 tcg_temp_free_ptr(t_zd);
4460}
4461
4462static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4463 gen_helper_sve_fp2scalar *fn)
4464{
138a1f7b
RH
4465 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4466 tcg_constant_i64(imm), fn);
cc48affe
RH
4467}
4468
4469#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4470static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4471{ \
4472 static gen_helper_sve_fp2scalar * const fns[3] = { \
4473 gen_helper_sve_##name##_h, \
4474 gen_helper_sve_##name##_s, \
4475 gen_helper_sve_##name##_d \
4476 }; \
4477 static uint64_t const val[3][2] = { \
4478 { float16_##const0, float16_##const1 }, \
4479 { float32_##const0, float32_##const1 }, \
4480 { float64_##const0, float64_##const1 }, \
4481 }; \
4482 if (a->esz == 0) { \
4483 return false; \
4484 } \
4485 if (sve_access_check(s)) { \
4486 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4487 } \
4488 return true; \
4489}
4490
cc48affe
RH
4491DO_FP_IMM(FADD, fadds, half, one)
4492DO_FP_IMM(FSUB, fsubs, half, one)
4493DO_FP_IMM(FMUL, fmuls, half, two)
4494DO_FP_IMM(FSUBR, fsubrs, half, one)
4495DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4496DO_FP_IMM(FMINNM, fminnms, zero, one)
4497DO_FP_IMM(FMAX, fmaxs, zero, one)
4498DO_FP_IMM(FMIN, fmins, zero, one)
4499
4500#undef DO_FP_IMM
4501
abfdefd5
RH
4502static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4503 gen_helper_gvec_4_ptr *fn)
4504{
4505 if (fn == NULL) {
4506 return false;
4507 }
4508 if (sve_access_check(s)) {
4509 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4510 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4511 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4512 vec_full_reg_offset(s, a->rn),
4513 vec_full_reg_offset(s, a->rm),
4514 pred_full_reg_offset(s, a->pg),
4515 status, vsz, vsz, 0, fn);
4516 tcg_temp_free_ptr(status);
4517 }
4518 return true;
4519}
4520
4521#define DO_FPCMP(NAME, name) \
3a7be554 4522static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4523{ \
4524 static gen_helper_gvec_4_ptr * const fns[4] = { \
4525 NULL, gen_helper_sve_##name##_h, \
4526 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4527 }; \
4528 return do_fp_cmp(s, a, fns[a->esz]); \
4529}
4530
4531DO_FPCMP(FCMGE, fcmge)
4532DO_FPCMP(FCMGT, fcmgt)
4533DO_FPCMP(FCMEQ, fcmeq)
4534DO_FPCMP(FCMNE, fcmne)
4535DO_FPCMP(FCMUO, fcmuo)
4536DO_FPCMP(FACGE, facge)
4537DO_FPCMP(FACGT, facgt)
4538
4539#undef DO_FPCMP
4540
3a7be554 4541static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4542{
4543 static gen_helper_gvec_4_ptr * const fns[3] = {
4544 gen_helper_sve_fcadd_h,
4545 gen_helper_sve_fcadd_s,
4546 gen_helper_sve_fcadd_d
4547 };
4548
4549 if (a->esz == 0) {
4550 return false;
4551 }
4552 if (sve_access_check(s)) {
4553 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4554 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4555 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4556 vec_full_reg_offset(s, a->rn),
4557 vec_full_reg_offset(s, a->rm),
4558 pred_full_reg_offset(s, a->pg),
4559 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4560 tcg_temp_free_ptr(status);
4561 }
4562 return true;
4563}
4564
08975da9
RH
4565static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4566 gen_helper_gvec_5_ptr *fn)
6ceabaad 4567{
08975da9 4568 if (a->esz == 0) {
6ceabaad
RH
4569 return false;
4570 }
08975da9
RH
4571 if (sve_access_check(s)) {
4572 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4573 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4574 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4575 vec_full_reg_offset(s, a->rn),
4576 vec_full_reg_offset(s, a->rm),
4577 vec_full_reg_offset(s, a->ra),
4578 pred_full_reg_offset(s, a->pg),
4579 status, vsz, vsz, 0, fn);
4580 tcg_temp_free_ptr(status);
6ceabaad 4581 }
6ceabaad
RH
4582 return true;
4583}
4584
4585#define DO_FMLA(NAME, name) \
3a7be554 4586static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4587{ \
08975da9 4588 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4589 NULL, gen_helper_sve_##name##_h, \
4590 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4591 }; \
4592 return do_fmla(s, a, fns[a->esz]); \
4593}
4594
4595DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4596DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4597DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4598DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4599
4600#undef DO_FMLA
4601
3a7be554 4602static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4603{
08975da9
RH
4604 static gen_helper_gvec_5_ptr * const fns[4] = {
4605 NULL,
05f48bab
RH
4606 gen_helper_sve_fcmla_zpzzz_h,
4607 gen_helper_sve_fcmla_zpzzz_s,
4608 gen_helper_sve_fcmla_zpzzz_d,
4609 };
4610
4611 if (a->esz == 0) {
4612 return false;
4613 }
4614 if (sve_access_check(s)) {
4615 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4616 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4617 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4618 vec_full_reg_offset(s, a->rn),
4619 vec_full_reg_offset(s, a->rm),
4620 vec_full_reg_offset(s, a->ra),
4621 pred_full_reg_offset(s, a->pg),
4622 status, vsz, vsz, a->rot, fns[a->esz]);
4623 tcg_temp_free_ptr(status);
05f48bab
RH
4624 }
4625 return true;
4626}
4627
3a7be554 4628static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4629{
636ddeb1 4630 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4631 gen_helper_gvec_fcmlah_idx,
4632 gen_helper_gvec_fcmlas_idx,
4633 };
4634
4635 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4636 tcg_debug_assert(a->rd == a->ra);
4637 if (sve_access_check(s)) {
4638 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4639 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4640 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4641 vec_full_reg_offset(s, a->rn),
4642 vec_full_reg_offset(s, a->rm),
636ddeb1 4643 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4644 status, vsz, vsz,
4645 a->index * 4 + a->rot,
4646 fns[a->esz - 1]);
4647 tcg_temp_free_ptr(status);
4648 }
4649 return true;
4650}
4651
8092c6a3
RH
4652/*
4653 *** SVE Floating Point Unary Operations Predicated Group
4654 */
4655
4656static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4657 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4658{
4659 if (sve_access_check(s)) {
4660 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4661 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4662 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4663 vec_full_reg_offset(s, rn),
4664 pred_full_reg_offset(s, pg),
4665 status, vsz, vsz, 0, fn);
4666 tcg_temp_free_ptr(status);
4667 }
4668 return true;
4669}
4670
3a7be554 4671static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4672{
e4ab5124 4673 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4674}
4675
3a7be554 4676static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4677{
4678 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4679}
4680
d29b17ca
RH
4681static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4682{
4683 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4684 return false;
4685 }
4686 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4687}
4688
3a7be554 4689static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4690{
e4ab5124 4691 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4692}
4693
3a7be554 4694static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4695{
4696 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4697}
4698
3a7be554 4699static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4700{
4701 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4702}
4703
3a7be554 4704static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4705{
4706 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4707}
4708
3a7be554 4709static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4710{
4711 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4712}
4713
3a7be554 4714static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4715{
4716 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4717}
4718
3a7be554 4719static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4720{
4721 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4722}
4723
3a7be554 4724static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4725{
4726 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4727}
4728
3a7be554 4729static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4730{
4731 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4732}
4733
3a7be554 4734static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4735{
4736 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4737}
4738
3a7be554 4739static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4740{
4741 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4742}
4743
3a7be554 4744static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4745{
4746 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4747}
4748
3a7be554 4749static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4750{
4751 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4752}
4753
3a7be554 4754static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4755{
4756 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4757}
4758
3a7be554 4759static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4760{
4761 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4762}
4763
3a7be554 4764static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4765{
4766 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4767}
4768
3a7be554 4769static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4770{
4771 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4772}
4773
3a7be554 4774static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4775{
4776 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4777}
4778
cda3c753
RH
4779static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4780 gen_helper_sve_frint_h,
4781 gen_helper_sve_frint_s,
4782 gen_helper_sve_frint_d
4783};
4784
3a7be554 4785static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4786{
4787 if (a->esz == 0) {
4788 return false;
4789 }
4790 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4791 frint_fns[a->esz - 1]);
4792}
4793
3a7be554 4794static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4795{
4796 static gen_helper_gvec_3_ptr * const fns[3] = {
4797 gen_helper_sve_frintx_h,
4798 gen_helper_sve_frintx_s,
4799 gen_helper_sve_frintx_d
4800 };
4801 if (a->esz == 0) {
4802 return false;
4803 }
4804 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4805}
4806
95365277
SL
4807static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4808 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4809{
cda3c753
RH
4810 if (sve_access_check(s)) {
4811 unsigned vsz = vec_full_reg_size(s);
4812 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4813 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4814
4815 gen_helper_set_rmode(tmode, tmode, status);
4816
4817 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4818 vec_full_reg_offset(s, a->rn),
4819 pred_full_reg_offset(s, a->pg),
95365277 4820 status, vsz, vsz, 0, fn);
cda3c753
RH
4821
4822 gen_helper_set_rmode(tmode, tmode, status);
4823 tcg_temp_free_i32(tmode);
4824 tcg_temp_free_ptr(status);
4825 }
4826 return true;
4827}
4828
3a7be554 4829static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4830{
95365277
SL
4831 if (a->esz == 0) {
4832 return false;
4833 }
4834 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4835}
4836
3a7be554 4837static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4838{
95365277
SL
4839 if (a->esz == 0) {
4840 return false;
4841 }
4842 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4843}
4844
3a7be554 4845static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4846{
95365277
SL
4847 if (a->esz == 0) {
4848 return false;
4849 }
4850 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4851}
4852
3a7be554 4853static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4854{
95365277
SL
4855 if (a->esz == 0) {
4856 return false;
4857 }
4858 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4859}
4860
3a7be554 4861static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4862{
95365277
SL
4863 if (a->esz == 0) {
4864 return false;
4865 }
4866 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4867}
4868
3a7be554 4869static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4870{
4871 static gen_helper_gvec_3_ptr * const fns[3] = {
4872 gen_helper_sve_frecpx_h,
4873 gen_helper_sve_frecpx_s,
4874 gen_helper_sve_frecpx_d
4875 };
4876 if (a->esz == 0) {
4877 return false;
4878 }
4879 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4880}
4881
3a7be554 4882static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4883{
4884 static gen_helper_gvec_3_ptr * const fns[3] = {
4885 gen_helper_sve_fsqrt_h,
4886 gen_helper_sve_fsqrt_s,
4887 gen_helper_sve_fsqrt_d
4888 };
4889 if (a->esz == 0) {
4890 return false;
4891 }
4892 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4893}
4894
3a7be554 4895static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4896{
4897 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4898}
4899
3a7be554 4900static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4901{
4902 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4903}
4904
3a7be554 4905static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4906{
4907 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4908}
4909
3a7be554 4910static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4911{
4912 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4913}
4914
3a7be554 4915static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4916{
4917 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4918}
4919
3a7be554 4920static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4921{
4922 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4923}
4924
3a7be554 4925static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4926{
4927 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4928}
4929
3a7be554 4930static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4931{
4932 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4933}
4934
3a7be554 4935static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4936{
4937 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4938}
4939
3a7be554 4940static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4941{
4942 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4943}
4944
3a7be554 4945static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4946{
4947 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4948}
4949
3a7be554 4950static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4951{
4952 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4953}
4954
3a7be554 4955static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4956{
4957 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4958}
4959
3a7be554 4960static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4961{
4962 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4963}
4964
d1822297
RH
4965/*
4966 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4967 */
4968
4969/* Subroutine loading a vector register at VOFS of LEN bytes.
4970 * The load should begin at the address Rn + IMM.
4971 */
4972
19f2acc9 4973static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4974{
19f2acc9
RH
4975 int len_align = QEMU_ALIGN_DOWN(len, 8);
4976 int len_remain = len % 8;
4977 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4978 int midx = get_mem_index(s);
b2aa8879 4979 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4980
b2aa8879
RH
4981 dirty_addr = tcg_temp_new_i64();
4982 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4983 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4984 tcg_temp_free_i64(dirty_addr);
d1822297 4985
b2aa8879
RH
4986 /*
4987 * Note that unpredicated load/store of vector/predicate registers
d1822297 4988 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4989 * operations on larger quantities.
d1822297
RH
4990 * Attempt to keep code expansion to a minimum by limiting the
4991 * amount of unrolling done.
4992 */
4993 if (nparts <= 4) {
4994 int i;
4995
b2aa8879 4996 t0 = tcg_temp_new_i64();
d1822297 4997 for (i = 0; i < len_align; i += 8) {
fc313c64 4998 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4999 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 5000 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5001 }
b2aa8879 5002 tcg_temp_free_i64(t0);
d1822297
RH
5003 } else {
5004 TCGLabel *loop = gen_new_label();
5005 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5006
b2aa8879
RH
5007 /* Copy the clean address into a local temp, live across the loop. */
5008 t0 = clean_addr;
4b4dc975 5009 clean_addr = new_tmp_a64_local(s);
b2aa8879 5010 tcg_gen_mov_i64(clean_addr, t0);
d1822297 5011
b2aa8879 5012 gen_set_label(loop);
d1822297 5013
b2aa8879 5014 t0 = tcg_temp_new_i64();
fc313c64 5015 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 5016 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5017
b2aa8879 5018 tp = tcg_temp_new_ptr();
d1822297
RH
5019 tcg_gen_add_ptr(tp, cpu_env, i);
5020 tcg_gen_addi_ptr(i, i, 8);
5021 tcg_gen_st_i64(t0, tp, vofs);
5022 tcg_temp_free_ptr(tp);
b2aa8879 5023 tcg_temp_free_i64(t0);
d1822297
RH
5024
5025 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5026 tcg_temp_free_ptr(i);
5027 }
5028
b2aa8879
RH
5029 /*
5030 * Predicate register loads can be any multiple of 2.
d1822297
RH
5031 * Note that we still store the entire 64-bit unit into cpu_env.
5032 */
5033 if (len_remain) {
b2aa8879 5034 t0 = tcg_temp_new_i64();
d1822297
RH
5035 switch (len_remain) {
5036 case 2:
5037 case 4:
5038 case 8:
b2aa8879
RH
5039 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5040 MO_LE | ctz32(len_remain));
d1822297
RH
5041 break;
5042
5043 case 6:
5044 t1 = tcg_temp_new_i64();
b2aa8879
RH
5045 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5046 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
5048 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5049 tcg_temp_free_i64(t1);
5050 break;
5051
5052 default:
5053 g_assert_not_reached();
5054 }
5055 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 5056 tcg_temp_free_i64(t0);
d1822297 5057 }
d1822297
RH
5058}
5059
5047c204 5060/* Similarly for stores. */
19f2acc9 5061static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 5062{
19f2acc9
RH
5063 int len_align = QEMU_ALIGN_DOWN(len, 8);
5064 int len_remain = len % 8;
5065 int nparts = len / 8 + ctpop8(len_remain);
5047c204 5066 int midx = get_mem_index(s);
bba87d0a 5067 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 5068
bba87d0a
RH
5069 dirty_addr = tcg_temp_new_i64();
5070 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5071 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 5072 tcg_temp_free_i64(dirty_addr);
5047c204
RH
5073
5074 /* Note that unpredicated load/store of vector/predicate registers
5075 * are defined as a stream of bytes, which equates to little-endian
5076 * operations on larger quantities. There is no nice way to force
5077 * a little-endian store for aarch64_be-linux-user out of line.
5078 *
5079 * Attempt to keep code expansion to a minimum by limiting the
5080 * amount of unrolling done.
5081 */
5082 if (nparts <= 4) {
5083 int i;
5084
bba87d0a 5085 t0 = tcg_temp_new_i64();
5047c204
RH
5086 for (i = 0; i < len_align; i += 8) {
5087 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 5088 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 5089 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 5090 }
bba87d0a 5091 tcg_temp_free_i64(t0);
5047c204
RH
5092 } else {
5093 TCGLabel *loop = gen_new_label();
bba87d0a 5094 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 5095
bba87d0a
RH
5096 /* Copy the clean address into a local temp, live across the loop. */
5097 t0 = clean_addr;
4b4dc975 5098 clean_addr = new_tmp_a64_local(s);
bba87d0a 5099 tcg_gen_mov_i64(clean_addr, t0);
5047c204 5100
bba87d0a 5101 gen_set_label(loop);
5047c204 5102
bba87d0a
RH
5103 t0 = tcg_temp_new_i64();
5104 tp = tcg_temp_new_ptr();
5105 tcg_gen_add_ptr(tp, cpu_env, i);
5106 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 5107 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
5108 tcg_temp_free_ptr(tp);
5109
fc313c64 5110 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
5111 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5112 tcg_temp_free_i64(t0);
5047c204
RH
5113
5114 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5115 tcg_temp_free_ptr(i);
5116 }
5117
5118 /* Predicate register stores can be any multiple of 2. */
5119 if (len_remain) {
bba87d0a 5120 t0 = tcg_temp_new_i64();
5047c204 5121 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
5122
5123 switch (len_remain) {
5124 case 2:
5125 case 4:
5126 case 8:
bba87d0a
RH
5127 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5128 MO_LE | ctz32(len_remain));
5047c204
RH
5129 break;
5130
5131 case 6:
bba87d0a
RH
5132 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5133 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 5134 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 5135 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
5136 break;
5137
5138 default:
5139 g_assert_not_reached();
5140 }
bba87d0a 5141 tcg_temp_free_i64(t0);
5047c204 5142 }
5047c204
RH
5143}
5144
3a7be554 5145static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
5146{
5147 if (sve_access_check(s)) {
5148 int size = vec_full_reg_size(s);
5149 int off = vec_full_reg_offset(s, a->rd);
5150 do_ldr(s, off, size, a->rn, a->imm * size);
5151 }
5152 return true;
5153}
5154
3a7be554 5155static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
5156{
5157 if (sve_access_check(s)) {
5158 int size = pred_full_reg_size(s);
5159 int off = pred_full_reg_offset(s, a->rd);
5160 do_ldr(s, off, size, a->rn, a->imm * size);
5161 }
5162 return true;
5163}
c4e7c493 5164
3a7be554 5165static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
5166{
5167 if (sve_access_check(s)) {
5168 int size = vec_full_reg_size(s);
5169 int off = vec_full_reg_offset(s, a->rd);
5170 do_str(s, off, size, a->rn, a->imm * size);
5171 }
5172 return true;
5173}
5174
3a7be554 5175static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
5176{
5177 if (sve_access_check(s)) {
5178 int size = pred_full_reg_size(s);
5179 int off = pred_full_reg_offset(s, a->rd);
5180 do_str(s, off, size, a->rn, a->imm * size);
5181 }
5182 return true;
5183}
5184
c4e7c493
RH
5185/*
5186 *** SVE Memory - Contiguous Load Group
5187 */
5188
5189/* The memory mode of the dtype. */
14776ab5 5190static const MemOp dtype_mop[16] = {
c4e7c493
RH
5191 MO_UB, MO_UB, MO_UB, MO_UB,
5192 MO_SL, MO_UW, MO_UW, MO_UW,
5193 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 5194 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
5195};
5196
5197#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5198
5199/* The vector element size of dtype. */
5200static const uint8_t dtype_esz[16] = {
5201 0, 1, 2, 3,
5202 3, 1, 2, 3,
5203 3, 2, 2, 3,
5204 3, 2, 1, 3
5205};
5206
5207static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
5208 int dtype, uint32_t mte_n, bool is_write,
5209 gen_helper_gvec_mem *fn)
c4e7c493
RH
5210{
5211 unsigned vsz = vec_full_reg_size(s);
5212 TCGv_ptr t_pg;
206adacf 5213 int desc = 0;
c4e7c493 5214
206adacf
RH
5215 /*
5216 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
5217 * registers as pointers, so encode the regno into the data field.
5218 * For consistency, do this even for LD1.
5219 */
9473d0ec 5220 if (s->mte_active[0]) {
206adacf
RH
5221 int msz = dtype_msz(dtype);
5222
5223 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5224 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5225 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5226 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5227 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5228 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5229 } else {
5230 addr = clean_data_tbi(s, addr);
206adacf 5231 }
9473d0ec 5232
206adacf 5233 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
5234 t_pg = tcg_temp_new_ptr();
5235
5236 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 5237 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
5238
5239 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
5240}
5241
c182c6db
RH
5242/* Indexed by [mte][be][dtype][nreg] */
5243static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5244 { /* mte inactive, little-endian */
5245 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5246 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5247 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5248 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5249 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5250
5251 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5252 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5253 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5254 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5255 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5256
5257 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5258 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5259 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5260 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5261 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5262
5263 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5264 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5265 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5266 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5267 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5268
5269 /* mte inactive, big-endian */
5270 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5271 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5272 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5273 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5274 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5275
5276 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5277 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5278 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5279 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5280 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5281
5282 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5283 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5284 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5285 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5286 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5287
5288 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5289 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5290 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5291 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5292 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5293
5294 { /* mte active, little-endian */
5295 { { gen_helper_sve_ld1bb_r_mte,
5296 gen_helper_sve_ld2bb_r_mte,
5297 gen_helper_sve_ld3bb_r_mte,
5298 gen_helper_sve_ld4bb_r_mte },
5299 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5300 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5301 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5302
5303 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5304 { gen_helper_sve_ld1hh_le_r_mte,
5305 gen_helper_sve_ld2hh_le_r_mte,
5306 gen_helper_sve_ld3hh_le_r_mte,
5307 gen_helper_sve_ld4hh_le_r_mte },
5308 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5309 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5310
5311 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5312 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5313 { gen_helper_sve_ld1ss_le_r_mte,
5314 gen_helper_sve_ld2ss_le_r_mte,
5315 gen_helper_sve_ld3ss_le_r_mte,
5316 gen_helper_sve_ld4ss_le_r_mte },
5317 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5318
5319 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5320 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5321 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5322 { gen_helper_sve_ld1dd_le_r_mte,
5323 gen_helper_sve_ld2dd_le_r_mte,
5324 gen_helper_sve_ld3dd_le_r_mte,
5325 gen_helper_sve_ld4dd_le_r_mte } },
5326
5327 /* mte active, big-endian */
5328 { { gen_helper_sve_ld1bb_r_mte,
5329 gen_helper_sve_ld2bb_r_mte,
5330 gen_helper_sve_ld3bb_r_mte,
5331 gen_helper_sve_ld4bb_r_mte },
5332 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5333 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5334 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5335
5336 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5337 { gen_helper_sve_ld1hh_be_r_mte,
5338 gen_helper_sve_ld2hh_be_r_mte,
5339 gen_helper_sve_ld3hh_be_r_mte,
5340 gen_helper_sve_ld4hh_be_r_mte },
5341 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5342 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5343
5344 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5345 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5346 { gen_helper_sve_ld1ss_be_r_mte,
5347 gen_helper_sve_ld2ss_be_r_mte,
5348 gen_helper_sve_ld3ss_be_r_mte,
5349 gen_helper_sve_ld4ss_be_r_mte },
5350 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5351
5352 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5353 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5354 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5355 { gen_helper_sve_ld1dd_be_r_mte,
5356 gen_helper_sve_ld2dd_be_r_mte,
5357 gen_helper_sve_ld3dd_be_r_mte,
5358 gen_helper_sve_ld4dd_be_r_mte } } },
5359};
5360
c4e7c493
RH
5361static void do_ld_zpa(DisasContext *s, int zt, int pg,
5362 TCGv_i64 addr, int dtype, int nreg)
5363{
206adacf 5364 gen_helper_gvec_mem *fn
c182c6db 5365 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5366
206adacf
RH
5367 /*
5368 * While there are holes in the table, they are not
c4e7c493
RH
5369 * accessible via the instruction encoding.
5370 */
5371 assert(fn != NULL);
206adacf 5372 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5373}
5374
3a7be554 5375static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5376{
5377 if (a->rm == 31) {
5378 return false;
5379 }
5380 if (sve_access_check(s)) {
5381 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5382 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5383 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5384 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5385 }
5386 return true;
5387}
5388
3a7be554 5389static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5390{
5391 if (sve_access_check(s)) {
5392 int vsz = vec_full_reg_size(s);
5393 int elements = vsz >> dtype_esz[a->dtype];
5394 TCGv_i64 addr = new_tmp_a64(s);
5395
5396 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5397 (a->imm * elements * (a->nreg + 1))
5398 << dtype_msz(a->dtype));
5399 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5400 }
5401 return true;
5402}
e2654d75 5403
3a7be554 5404static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5405{
aa13f7c3
RH
5406 static gen_helper_gvec_mem * const fns[2][2][16] = {
5407 { /* mte inactive, little-endian */
5408 { gen_helper_sve_ldff1bb_r,
5409 gen_helper_sve_ldff1bhu_r,
5410 gen_helper_sve_ldff1bsu_r,
5411 gen_helper_sve_ldff1bdu_r,
5412
5413 gen_helper_sve_ldff1sds_le_r,
5414 gen_helper_sve_ldff1hh_le_r,
5415 gen_helper_sve_ldff1hsu_le_r,
5416 gen_helper_sve_ldff1hdu_le_r,
5417
5418 gen_helper_sve_ldff1hds_le_r,
5419 gen_helper_sve_ldff1hss_le_r,
5420 gen_helper_sve_ldff1ss_le_r,
5421 gen_helper_sve_ldff1sdu_le_r,
5422
5423 gen_helper_sve_ldff1bds_r,
5424 gen_helper_sve_ldff1bss_r,
5425 gen_helper_sve_ldff1bhs_r,
5426 gen_helper_sve_ldff1dd_le_r },
5427
5428 /* mte inactive, big-endian */
5429 { gen_helper_sve_ldff1bb_r,
5430 gen_helper_sve_ldff1bhu_r,
5431 gen_helper_sve_ldff1bsu_r,
5432 gen_helper_sve_ldff1bdu_r,
5433
5434 gen_helper_sve_ldff1sds_be_r,
5435 gen_helper_sve_ldff1hh_be_r,
5436 gen_helper_sve_ldff1hsu_be_r,
5437 gen_helper_sve_ldff1hdu_be_r,
5438
5439 gen_helper_sve_ldff1hds_be_r,
5440 gen_helper_sve_ldff1hss_be_r,
5441 gen_helper_sve_ldff1ss_be_r,
5442 gen_helper_sve_ldff1sdu_be_r,
5443
5444 gen_helper_sve_ldff1bds_r,
5445 gen_helper_sve_ldff1bss_r,
5446 gen_helper_sve_ldff1bhs_r,
5447 gen_helper_sve_ldff1dd_be_r } },
5448
5449 { /* mte active, little-endian */
5450 { gen_helper_sve_ldff1bb_r_mte,
5451 gen_helper_sve_ldff1bhu_r_mte,
5452 gen_helper_sve_ldff1bsu_r_mte,
5453 gen_helper_sve_ldff1bdu_r_mte,
5454
5455 gen_helper_sve_ldff1sds_le_r_mte,
5456 gen_helper_sve_ldff1hh_le_r_mte,
5457 gen_helper_sve_ldff1hsu_le_r_mte,
5458 gen_helper_sve_ldff1hdu_le_r_mte,
5459
5460 gen_helper_sve_ldff1hds_le_r_mte,
5461 gen_helper_sve_ldff1hss_le_r_mte,
5462 gen_helper_sve_ldff1ss_le_r_mte,
5463 gen_helper_sve_ldff1sdu_le_r_mte,
5464
5465 gen_helper_sve_ldff1bds_r_mte,
5466 gen_helper_sve_ldff1bss_r_mte,
5467 gen_helper_sve_ldff1bhs_r_mte,
5468 gen_helper_sve_ldff1dd_le_r_mte },
5469
5470 /* mte active, big-endian */
5471 { gen_helper_sve_ldff1bb_r_mte,
5472 gen_helper_sve_ldff1bhu_r_mte,
5473 gen_helper_sve_ldff1bsu_r_mte,
5474 gen_helper_sve_ldff1bdu_r_mte,
5475
5476 gen_helper_sve_ldff1sds_be_r_mte,
5477 gen_helper_sve_ldff1hh_be_r_mte,
5478 gen_helper_sve_ldff1hsu_be_r_mte,
5479 gen_helper_sve_ldff1hdu_be_r_mte,
5480
5481 gen_helper_sve_ldff1hds_be_r_mte,
5482 gen_helper_sve_ldff1hss_be_r_mte,
5483 gen_helper_sve_ldff1ss_be_r_mte,
5484 gen_helper_sve_ldff1sdu_be_r_mte,
5485
5486 gen_helper_sve_ldff1bds_r_mte,
5487 gen_helper_sve_ldff1bss_r_mte,
5488 gen_helper_sve_ldff1bhs_r_mte,
5489 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5490 };
5491
5492 if (sve_access_check(s)) {
5493 TCGv_i64 addr = new_tmp_a64(s);
5494 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5495 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5496 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5497 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5498 }
5499 return true;
5500}
5501
3a7be554 5502static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5503{
aa13f7c3
RH
5504 static gen_helper_gvec_mem * const fns[2][2][16] = {
5505 { /* mte inactive, little-endian */
5506 { gen_helper_sve_ldnf1bb_r,
5507 gen_helper_sve_ldnf1bhu_r,
5508 gen_helper_sve_ldnf1bsu_r,
5509 gen_helper_sve_ldnf1bdu_r,
5510
5511 gen_helper_sve_ldnf1sds_le_r,
5512 gen_helper_sve_ldnf1hh_le_r,
5513 gen_helper_sve_ldnf1hsu_le_r,
5514 gen_helper_sve_ldnf1hdu_le_r,
5515
5516 gen_helper_sve_ldnf1hds_le_r,
5517 gen_helper_sve_ldnf1hss_le_r,
5518 gen_helper_sve_ldnf1ss_le_r,
5519 gen_helper_sve_ldnf1sdu_le_r,
5520
5521 gen_helper_sve_ldnf1bds_r,
5522 gen_helper_sve_ldnf1bss_r,
5523 gen_helper_sve_ldnf1bhs_r,
5524 gen_helper_sve_ldnf1dd_le_r },
5525
5526 /* mte inactive, big-endian */
5527 { gen_helper_sve_ldnf1bb_r,
5528 gen_helper_sve_ldnf1bhu_r,
5529 gen_helper_sve_ldnf1bsu_r,
5530 gen_helper_sve_ldnf1bdu_r,
5531
5532 gen_helper_sve_ldnf1sds_be_r,
5533 gen_helper_sve_ldnf1hh_be_r,
5534 gen_helper_sve_ldnf1hsu_be_r,
5535 gen_helper_sve_ldnf1hdu_be_r,
5536
5537 gen_helper_sve_ldnf1hds_be_r,
5538 gen_helper_sve_ldnf1hss_be_r,
5539 gen_helper_sve_ldnf1ss_be_r,
5540 gen_helper_sve_ldnf1sdu_be_r,
5541
5542 gen_helper_sve_ldnf1bds_r,
5543 gen_helper_sve_ldnf1bss_r,
5544 gen_helper_sve_ldnf1bhs_r,
5545 gen_helper_sve_ldnf1dd_be_r } },
5546
5547 { /* mte inactive, little-endian */
5548 { gen_helper_sve_ldnf1bb_r_mte,
5549 gen_helper_sve_ldnf1bhu_r_mte,
5550 gen_helper_sve_ldnf1bsu_r_mte,
5551 gen_helper_sve_ldnf1bdu_r_mte,
5552
5553 gen_helper_sve_ldnf1sds_le_r_mte,
5554 gen_helper_sve_ldnf1hh_le_r_mte,
5555 gen_helper_sve_ldnf1hsu_le_r_mte,
5556 gen_helper_sve_ldnf1hdu_le_r_mte,
5557
5558 gen_helper_sve_ldnf1hds_le_r_mte,
5559 gen_helper_sve_ldnf1hss_le_r_mte,
5560 gen_helper_sve_ldnf1ss_le_r_mte,
5561 gen_helper_sve_ldnf1sdu_le_r_mte,
5562
5563 gen_helper_sve_ldnf1bds_r_mte,
5564 gen_helper_sve_ldnf1bss_r_mte,
5565 gen_helper_sve_ldnf1bhs_r_mte,
5566 gen_helper_sve_ldnf1dd_le_r_mte },
5567
5568 /* mte inactive, big-endian */
5569 { gen_helper_sve_ldnf1bb_r_mte,
5570 gen_helper_sve_ldnf1bhu_r_mte,
5571 gen_helper_sve_ldnf1bsu_r_mte,
5572 gen_helper_sve_ldnf1bdu_r_mte,
5573
5574 gen_helper_sve_ldnf1sds_be_r_mte,
5575 gen_helper_sve_ldnf1hh_be_r_mte,
5576 gen_helper_sve_ldnf1hsu_be_r_mte,
5577 gen_helper_sve_ldnf1hdu_be_r_mte,
5578
5579 gen_helper_sve_ldnf1hds_be_r_mte,
5580 gen_helper_sve_ldnf1hss_be_r_mte,
5581 gen_helper_sve_ldnf1ss_be_r_mte,
5582 gen_helper_sve_ldnf1sdu_be_r_mte,
5583
5584 gen_helper_sve_ldnf1bds_r_mte,
5585 gen_helper_sve_ldnf1bss_r_mte,
5586 gen_helper_sve_ldnf1bhs_r_mte,
5587 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5588 };
5589
5590 if (sve_access_check(s)) {
5591 int vsz = vec_full_reg_size(s);
5592 int elements = vsz >> dtype_esz[a->dtype];
5593 int off = (a->imm * elements) << dtype_msz(a->dtype);
5594 TCGv_i64 addr = new_tmp_a64(s);
5595
5596 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5597 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5598 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5599 }
5600 return true;
5601}
1a039c7e 5602
c182c6db 5603static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5604{
05abe304
RH
5605 unsigned vsz = vec_full_reg_size(s);
5606 TCGv_ptr t_pg;
7924d239 5607 int poff;
05abe304
RH
5608
5609 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5610 poff = pred_full_reg_offset(s, pg);
5611 if (vsz > 16) {
5612 /*
5613 * Zero-extend the first 16 bits of the predicate into a temporary.
5614 * This avoids triggering an assert making sure we don't have bits
5615 * set within a predicate beyond VQ, but we have lowered VQ to 1
5616 * for this load operation.
5617 */
5618 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5619#if HOST_BIG_ENDIAN
2a99ab2b
RH
5620 poff += 6;
5621#endif
5622 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5623
5624 poff = offsetof(CPUARMState, vfp.preg_tmp);
5625 tcg_gen_st_i64(tmp, cpu_env, poff);
5626 tcg_temp_free_i64(tmp);
5627 }
5628
05abe304 5629 t_pg = tcg_temp_new_ptr();
2a99ab2b 5630 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5631
c182c6db
RH
5632 gen_helper_gvec_mem *fn
5633 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5634 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5635
5636 tcg_temp_free_ptr(t_pg);
05abe304
RH
5637
5638 /* Replicate that first quadword. */
5639 if (vsz > 16) {
7924d239
RH
5640 int doff = vec_full_reg_offset(s, zt);
5641 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5642 }
5643}
5644
3a7be554 5645static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5646{
5647 if (a->rm == 31) {
5648 return false;
5649 }
5650 if (sve_access_check(s)) {
5651 int msz = dtype_msz(a->dtype);
5652 TCGv_i64 addr = new_tmp_a64(s);
5653 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5654 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5655 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5656 }
5657 return true;
5658}
5659
3a7be554 5660static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5661{
5662 if (sve_access_check(s)) {
5663 TCGv_i64 addr = new_tmp_a64(s);
5664 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5665 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5666 }
5667 return true;
5668}
5669
12c563f6
RH
5670static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5671{
5672 unsigned vsz = vec_full_reg_size(s);
5673 unsigned vsz_r32;
5674 TCGv_ptr t_pg;
5675 int poff, doff;
5676
5677 if (vsz < 32) {
5678 /*
5679 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5680 * in the ARM pseudocode, which is the sve_access_check() done
5681 * in our caller. We should not now return false from the caller.
5682 */
5683 unallocated_encoding(s);
5684 return;
5685 }
5686
5687 /* Load the first octaword using the normal predicated load helpers. */
5688
5689 poff = pred_full_reg_offset(s, pg);
5690 if (vsz > 32) {
5691 /*
5692 * Zero-extend the first 32 bits of the predicate into a temporary.
5693 * This avoids triggering an assert making sure we don't have bits
5694 * set within a predicate beyond VQ, but we have lowered VQ to 2
5695 * for this load operation.
5696 */
5697 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5698#if HOST_BIG_ENDIAN
12c563f6
RH
5699 poff += 4;
5700#endif
5701 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5702
5703 poff = offsetof(CPUARMState, vfp.preg_tmp);
5704 tcg_gen_st_i64(tmp, cpu_env, poff);
5705 tcg_temp_free_i64(tmp);
5706 }
5707
5708 t_pg = tcg_temp_new_ptr();
5709 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5710
5711 gen_helper_gvec_mem *fn
5712 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5713 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5714
5715 tcg_temp_free_ptr(t_pg);
5716
5717 /*
5718 * Replicate that first octaword.
5719 * The replication happens in units of 32; if the full vector size
5720 * is not a multiple of 32, the final bits are zeroed.
5721 */
5722 doff = vec_full_reg_offset(s, zt);
5723 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5724 if (vsz >= 64) {
5725 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5726 }
5727 vsz -= vsz_r32;
5728 if (vsz) {
5729 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5730 }
5731}
5732
5733static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5734{
5735 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5736 return false;
5737 }
5738 if (a->rm == 31) {
5739 return false;
5740 }
5741 if (sve_access_check(s)) {
5742 TCGv_i64 addr = new_tmp_a64(s);
5743 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5744 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5745 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5746 }
5747 return true;
5748}
5749
5750static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5751{
5752 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5753 return false;
5754 }
5755 if (sve_access_check(s)) {
5756 TCGv_i64 addr = new_tmp_a64(s);
5757 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5758 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5759 }
5760 return true;
5761}
5762
68459864 5763/* Load and broadcast element. */
3a7be554 5764static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5765{
68459864
RH
5766 unsigned vsz = vec_full_reg_size(s);
5767 unsigned psz = pred_full_reg_size(s);
5768 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5769 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5770 TCGLabel *over;
4ac430e1 5771 TCGv_i64 temp, clean_addr;
68459864 5772
c0ed9166
RH
5773 if (!sve_access_check(s)) {
5774 return true;
5775 }
5776
5777 over = gen_new_label();
5778
68459864
RH
5779 /* If the guarding predicate has no bits set, no load occurs. */
5780 if (psz <= 8) {
5781 /* Reduce the pred_esz_masks value simply to reduce the
5782 * size of the code generated here.
5783 */
5784 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5785 temp = tcg_temp_new_i64();
5786 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5787 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5788 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5789 tcg_temp_free_i64(temp);
5790 } else {
5791 TCGv_i32 t32 = tcg_temp_new_i32();
5792 find_last_active(s, t32, esz, a->pg);
5793 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5794 tcg_temp_free_i32(t32);
5795 }
5796
5797 /* Load the data. */
5798 temp = tcg_temp_new_i64();
d0e372b0 5799 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5800 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5801
5802 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5803 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5804
5805 /* Broadcast to *all* elements. */
5806 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5807 vsz, vsz, temp);
5808 tcg_temp_free_i64(temp);
5809
5810 /* Zero the inactive elements. */
5811 gen_set_label(over);
60245996 5812 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5813}
5814
1a039c7e
RH
5815static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5816 int msz, int esz, int nreg)
5817{
71b9f394
RH
5818 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5819 { { { gen_helper_sve_st1bb_r,
5820 gen_helper_sve_st1bh_r,
5821 gen_helper_sve_st1bs_r,
5822 gen_helper_sve_st1bd_r },
5823 { NULL,
5824 gen_helper_sve_st1hh_le_r,
5825 gen_helper_sve_st1hs_le_r,
5826 gen_helper_sve_st1hd_le_r },
5827 { NULL, NULL,
5828 gen_helper_sve_st1ss_le_r,
5829 gen_helper_sve_st1sd_le_r },
5830 { NULL, NULL, NULL,
5831 gen_helper_sve_st1dd_le_r } },
5832 { { gen_helper_sve_st1bb_r,
5833 gen_helper_sve_st1bh_r,
5834 gen_helper_sve_st1bs_r,
5835 gen_helper_sve_st1bd_r },
5836 { NULL,
5837 gen_helper_sve_st1hh_be_r,
5838 gen_helper_sve_st1hs_be_r,
5839 gen_helper_sve_st1hd_be_r },
5840 { NULL, NULL,
5841 gen_helper_sve_st1ss_be_r,
5842 gen_helper_sve_st1sd_be_r },
5843 { NULL, NULL, NULL,
5844 gen_helper_sve_st1dd_be_r } } },
5845
5846 { { { gen_helper_sve_st1bb_r_mte,
5847 gen_helper_sve_st1bh_r_mte,
5848 gen_helper_sve_st1bs_r_mte,
5849 gen_helper_sve_st1bd_r_mte },
5850 { NULL,
5851 gen_helper_sve_st1hh_le_r_mte,
5852 gen_helper_sve_st1hs_le_r_mte,
5853 gen_helper_sve_st1hd_le_r_mte },
5854 { NULL, NULL,
5855 gen_helper_sve_st1ss_le_r_mte,
5856 gen_helper_sve_st1sd_le_r_mte },
5857 { NULL, NULL, NULL,
5858 gen_helper_sve_st1dd_le_r_mte } },
5859 { { gen_helper_sve_st1bb_r_mte,
5860 gen_helper_sve_st1bh_r_mte,
5861 gen_helper_sve_st1bs_r_mte,
5862 gen_helper_sve_st1bd_r_mte },
5863 { NULL,
5864 gen_helper_sve_st1hh_be_r_mte,
5865 gen_helper_sve_st1hs_be_r_mte,
5866 gen_helper_sve_st1hd_be_r_mte },
5867 { NULL, NULL,
5868 gen_helper_sve_st1ss_be_r_mte,
5869 gen_helper_sve_st1sd_be_r_mte },
5870 { NULL, NULL, NULL,
5871 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5872 };
71b9f394
RH
5873 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5874 { { { gen_helper_sve_st2bb_r,
5875 gen_helper_sve_st2hh_le_r,
5876 gen_helper_sve_st2ss_le_r,
5877 gen_helper_sve_st2dd_le_r },
5878 { gen_helper_sve_st3bb_r,
5879 gen_helper_sve_st3hh_le_r,
5880 gen_helper_sve_st3ss_le_r,
5881 gen_helper_sve_st3dd_le_r },
5882 { gen_helper_sve_st4bb_r,
5883 gen_helper_sve_st4hh_le_r,
5884 gen_helper_sve_st4ss_le_r,
5885 gen_helper_sve_st4dd_le_r } },
5886 { { gen_helper_sve_st2bb_r,
5887 gen_helper_sve_st2hh_be_r,
5888 gen_helper_sve_st2ss_be_r,
5889 gen_helper_sve_st2dd_be_r },
5890 { gen_helper_sve_st3bb_r,
5891 gen_helper_sve_st3hh_be_r,
5892 gen_helper_sve_st3ss_be_r,
5893 gen_helper_sve_st3dd_be_r },
5894 { gen_helper_sve_st4bb_r,
5895 gen_helper_sve_st4hh_be_r,
5896 gen_helper_sve_st4ss_be_r,
5897 gen_helper_sve_st4dd_be_r } } },
5898 { { { gen_helper_sve_st2bb_r_mte,
5899 gen_helper_sve_st2hh_le_r_mte,
5900 gen_helper_sve_st2ss_le_r_mte,
5901 gen_helper_sve_st2dd_le_r_mte },
5902 { gen_helper_sve_st3bb_r_mte,
5903 gen_helper_sve_st3hh_le_r_mte,
5904 gen_helper_sve_st3ss_le_r_mte,
5905 gen_helper_sve_st3dd_le_r_mte },
5906 { gen_helper_sve_st4bb_r_mte,
5907 gen_helper_sve_st4hh_le_r_mte,
5908 gen_helper_sve_st4ss_le_r_mte,
5909 gen_helper_sve_st4dd_le_r_mte } },
5910 { { gen_helper_sve_st2bb_r_mte,
5911 gen_helper_sve_st2hh_be_r_mte,
5912 gen_helper_sve_st2ss_be_r_mte,
5913 gen_helper_sve_st2dd_be_r_mte },
5914 { gen_helper_sve_st3bb_r_mte,
5915 gen_helper_sve_st3hh_be_r_mte,
5916 gen_helper_sve_st3ss_be_r_mte,
5917 gen_helper_sve_st3dd_be_r_mte },
5918 { gen_helper_sve_st4bb_r_mte,
5919 gen_helper_sve_st4hh_be_r_mte,
5920 gen_helper_sve_st4ss_be_r_mte,
5921 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5922 };
5923 gen_helper_gvec_mem *fn;
28d57f2d 5924 int be = s->be_data == MO_BE;
1a039c7e
RH
5925
5926 if (nreg == 0) {
5927 /* ST1 */
71b9f394
RH
5928 fn = fn_single[s->mte_active[0]][be][msz][esz];
5929 nreg = 1;
1a039c7e
RH
5930 } else {
5931 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5932 assert(msz == esz);
71b9f394 5933 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5934 }
5935 assert(fn != NULL);
71b9f394 5936 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5937}
5938
3a7be554 5939static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5940{
5941 if (a->rm == 31 || a->msz > a->esz) {
5942 return false;
5943 }
5944 if (sve_access_check(s)) {
5945 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5946 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5947 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5948 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5949 }
5950 return true;
5951}
5952
3a7be554 5953static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5954{
5955 if (a->msz > a->esz) {
5956 return false;
5957 }
5958 if (sve_access_check(s)) {
5959 int vsz = vec_full_reg_size(s);
5960 int elements = vsz >> a->esz;
5961 TCGv_i64 addr = new_tmp_a64(s);
5962
5963 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5964 (a->imm * elements * (a->nreg + 1)) << a->msz);
5965 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5966 }
5967 return true;
5968}
f6dbf62a
RH
5969
5970/*
5971 *** SVE gather loads / scatter stores
5972 */
5973
500d0484 5974static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5975 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5976 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5977{
5978 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5979 TCGv_ptr t_zm = tcg_temp_new_ptr();
5980 TCGv_ptr t_pg = tcg_temp_new_ptr();
5981 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5982 int desc = 0;
500d0484 5983
d28d12f0
RH
5984 if (s->mte_active[0]) {
5985 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5986 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5987 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5988 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5989 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5990 desc <<= SVE_MTEDESC_SHIFT;
5991 }
cdecb3fc 5992 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5993
5994 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5995 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5996 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5997 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5998
5999 tcg_temp_free_ptr(t_zt);
6000 tcg_temp_free_ptr(t_zm);
6001 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
6002}
6003
d28d12f0
RH
6004/* Indexed by [mte][be][ff][xs][u][msz]. */
6005static gen_helper_gvec_mem_scatter * const
6006gather_load_fn32[2][2][2][2][2][3] = {
6007 { /* MTE Inactive */
6008 { /* Little-endian */
6009 { { { gen_helper_sve_ldbss_zsu,
6010 gen_helper_sve_ldhss_le_zsu,
6011 NULL, },
6012 { gen_helper_sve_ldbsu_zsu,
6013 gen_helper_sve_ldhsu_le_zsu,
6014 gen_helper_sve_ldss_le_zsu, } },
6015 { { gen_helper_sve_ldbss_zss,
6016 gen_helper_sve_ldhss_le_zss,
6017 NULL, },
6018 { gen_helper_sve_ldbsu_zss,
6019 gen_helper_sve_ldhsu_le_zss,
6020 gen_helper_sve_ldss_le_zss, } } },
6021
6022 /* First-fault */
6023 { { { gen_helper_sve_ldffbss_zsu,
6024 gen_helper_sve_ldffhss_le_zsu,
6025 NULL, },
6026 { gen_helper_sve_ldffbsu_zsu,
6027 gen_helper_sve_ldffhsu_le_zsu,
6028 gen_helper_sve_ldffss_le_zsu, } },
6029 { { gen_helper_sve_ldffbss_zss,
6030 gen_helper_sve_ldffhss_le_zss,
6031 NULL, },
6032 { gen_helper_sve_ldffbsu_zss,
6033 gen_helper_sve_ldffhsu_le_zss,
6034 gen_helper_sve_ldffss_le_zss, } } } },
6035
6036 { /* Big-endian */
6037 { { { gen_helper_sve_ldbss_zsu,
6038 gen_helper_sve_ldhss_be_zsu,
6039 NULL, },
6040 { gen_helper_sve_ldbsu_zsu,
6041 gen_helper_sve_ldhsu_be_zsu,
6042 gen_helper_sve_ldss_be_zsu, } },
6043 { { gen_helper_sve_ldbss_zss,
6044 gen_helper_sve_ldhss_be_zss,
6045 NULL, },
6046 { gen_helper_sve_ldbsu_zss,
6047 gen_helper_sve_ldhsu_be_zss,
6048 gen_helper_sve_ldss_be_zss, } } },
6049
6050 /* First-fault */
6051 { { { gen_helper_sve_ldffbss_zsu,
6052 gen_helper_sve_ldffhss_be_zsu,
6053 NULL, },
6054 { gen_helper_sve_ldffbsu_zsu,
6055 gen_helper_sve_ldffhsu_be_zsu,
6056 gen_helper_sve_ldffss_be_zsu, } },
6057 { { gen_helper_sve_ldffbss_zss,
6058 gen_helper_sve_ldffhss_be_zss,
6059 NULL, },
6060 { gen_helper_sve_ldffbsu_zss,
6061 gen_helper_sve_ldffhsu_be_zss,
6062 gen_helper_sve_ldffss_be_zss, } } } } },
6063 { /* MTE Active */
6064 { /* Little-endian */
6065 { { { gen_helper_sve_ldbss_zsu_mte,
6066 gen_helper_sve_ldhss_le_zsu_mte,
6067 NULL, },
6068 { gen_helper_sve_ldbsu_zsu_mte,
6069 gen_helper_sve_ldhsu_le_zsu_mte,
6070 gen_helper_sve_ldss_le_zsu_mte, } },
6071 { { gen_helper_sve_ldbss_zss_mte,
6072 gen_helper_sve_ldhss_le_zss_mte,
6073 NULL, },
6074 { gen_helper_sve_ldbsu_zss_mte,
6075 gen_helper_sve_ldhsu_le_zss_mte,
6076 gen_helper_sve_ldss_le_zss_mte, } } },
6077
6078 /* First-fault */
6079 { { { gen_helper_sve_ldffbss_zsu_mte,
6080 gen_helper_sve_ldffhss_le_zsu_mte,
6081 NULL, },
6082 { gen_helper_sve_ldffbsu_zsu_mte,
6083 gen_helper_sve_ldffhsu_le_zsu_mte,
6084 gen_helper_sve_ldffss_le_zsu_mte, } },
6085 { { gen_helper_sve_ldffbss_zss_mte,
6086 gen_helper_sve_ldffhss_le_zss_mte,
6087 NULL, },
6088 { gen_helper_sve_ldffbsu_zss_mte,
6089 gen_helper_sve_ldffhsu_le_zss_mte,
6090 gen_helper_sve_ldffss_le_zss_mte, } } } },
6091
6092 { /* Big-endian */
6093 { { { gen_helper_sve_ldbss_zsu_mte,
6094 gen_helper_sve_ldhss_be_zsu_mte,
6095 NULL, },
6096 { gen_helper_sve_ldbsu_zsu_mte,
6097 gen_helper_sve_ldhsu_be_zsu_mte,
6098 gen_helper_sve_ldss_be_zsu_mte, } },
6099 { { gen_helper_sve_ldbss_zss_mte,
6100 gen_helper_sve_ldhss_be_zss_mte,
6101 NULL, },
6102 { gen_helper_sve_ldbsu_zss_mte,
6103 gen_helper_sve_ldhsu_be_zss_mte,
6104 gen_helper_sve_ldss_be_zss_mte, } } },
6105
6106 /* First-fault */
6107 { { { gen_helper_sve_ldffbss_zsu_mte,
6108 gen_helper_sve_ldffhss_be_zsu_mte,
6109 NULL, },
6110 { gen_helper_sve_ldffbsu_zsu_mte,
6111 gen_helper_sve_ldffhsu_be_zsu_mte,
6112 gen_helper_sve_ldffss_be_zsu_mte, } },
6113 { { gen_helper_sve_ldffbss_zss_mte,
6114 gen_helper_sve_ldffhss_be_zss_mte,
6115 NULL, },
6116 { gen_helper_sve_ldffbsu_zss_mte,
6117 gen_helper_sve_ldffhsu_be_zss_mte,
6118 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
6119};
6120
6121/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6122static gen_helper_gvec_mem_scatter * const
6123gather_load_fn64[2][2][2][3][2][4] = {
6124 { /* MTE Inactive */
6125 { /* Little-endian */
6126 { { { gen_helper_sve_ldbds_zsu,
6127 gen_helper_sve_ldhds_le_zsu,
6128 gen_helper_sve_ldsds_le_zsu,
6129 NULL, },
6130 { gen_helper_sve_ldbdu_zsu,
6131 gen_helper_sve_ldhdu_le_zsu,
6132 gen_helper_sve_ldsdu_le_zsu,
6133 gen_helper_sve_lddd_le_zsu, } },
6134 { { gen_helper_sve_ldbds_zss,
6135 gen_helper_sve_ldhds_le_zss,
6136 gen_helper_sve_ldsds_le_zss,
6137 NULL, },
6138 { gen_helper_sve_ldbdu_zss,
6139 gen_helper_sve_ldhdu_le_zss,
6140 gen_helper_sve_ldsdu_le_zss,
6141 gen_helper_sve_lddd_le_zss, } },
6142 { { gen_helper_sve_ldbds_zd,
6143 gen_helper_sve_ldhds_le_zd,
6144 gen_helper_sve_ldsds_le_zd,
6145 NULL, },
6146 { gen_helper_sve_ldbdu_zd,
6147 gen_helper_sve_ldhdu_le_zd,
6148 gen_helper_sve_ldsdu_le_zd,
6149 gen_helper_sve_lddd_le_zd, } } },
6150
6151 /* First-fault */
6152 { { { gen_helper_sve_ldffbds_zsu,
6153 gen_helper_sve_ldffhds_le_zsu,
6154 gen_helper_sve_ldffsds_le_zsu,
6155 NULL, },
6156 { gen_helper_sve_ldffbdu_zsu,
6157 gen_helper_sve_ldffhdu_le_zsu,
6158 gen_helper_sve_ldffsdu_le_zsu,
6159 gen_helper_sve_ldffdd_le_zsu, } },
6160 { { gen_helper_sve_ldffbds_zss,
6161 gen_helper_sve_ldffhds_le_zss,
6162 gen_helper_sve_ldffsds_le_zss,
6163 NULL, },
6164 { gen_helper_sve_ldffbdu_zss,
6165 gen_helper_sve_ldffhdu_le_zss,
6166 gen_helper_sve_ldffsdu_le_zss,
6167 gen_helper_sve_ldffdd_le_zss, } },
6168 { { gen_helper_sve_ldffbds_zd,
6169 gen_helper_sve_ldffhds_le_zd,
6170 gen_helper_sve_ldffsds_le_zd,
6171 NULL, },
6172 { gen_helper_sve_ldffbdu_zd,
6173 gen_helper_sve_ldffhdu_le_zd,
6174 gen_helper_sve_ldffsdu_le_zd,
6175 gen_helper_sve_ldffdd_le_zd, } } } },
6176 { /* Big-endian */
6177 { { { gen_helper_sve_ldbds_zsu,
6178 gen_helper_sve_ldhds_be_zsu,
6179 gen_helper_sve_ldsds_be_zsu,
6180 NULL, },
6181 { gen_helper_sve_ldbdu_zsu,
6182 gen_helper_sve_ldhdu_be_zsu,
6183 gen_helper_sve_ldsdu_be_zsu,
6184 gen_helper_sve_lddd_be_zsu, } },
6185 { { gen_helper_sve_ldbds_zss,
6186 gen_helper_sve_ldhds_be_zss,
6187 gen_helper_sve_ldsds_be_zss,
6188 NULL, },
6189 { gen_helper_sve_ldbdu_zss,
6190 gen_helper_sve_ldhdu_be_zss,
6191 gen_helper_sve_ldsdu_be_zss,
6192 gen_helper_sve_lddd_be_zss, } },
6193 { { gen_helper_sve_ldbds_zd,
6194 gen_helper_sve_ldhds_be_zd,
6195 gen_helper_sve_ldsds_be_zd,
6196 NULL, },
6197 { gen_helper_sve_ldbdu_zd,
6198 gen_helper_sve_ldhdu_be_zd,
6199 gen_helper_sve_ldsdu_be_zd,
6200 gen_helper_sve_lddd_be_zd, } } },
6201
6202 /* First-fault */
6203 { { { gen_helper_sve_ldffbds_zsu,
6204 gen_helper_sve_ldffhds_be_zsu,
6205 gen_helper_sve_ldffsds_be_zsu,
6206 NULL, },
6207 { gen_helper_sve_ldffbdu_zsu,
6208 gen_helper_sve_ldffhdu_be_zsu,
6209 gen_helper_sve_ldffsdu_be_zsu,
6210 gen_helper_sve_ldffdd_be_zsu, } },
6211 { { gen_helper_sve_ldffbds_zss,
6212 gen_helper_sve_ldffhds_be_zss,
6213 gen_helper_sve_ldffsds_be_zss,
6214 NULL, },
6215 { gen_helper_sve_ldffbdu_zss,
6216 gen_helper_sve_ldffhdu_be_zss,
6217 gen_helper_sve_ldffsdu_be_zss,
6218 gen_helper_sve_ldffdd_be_zss, } },
6219 { { gen_helper_sve_ldffbds_zd,
6220 gen_helper_sve_ldffhds_be_zd,
6221 gen_helper_sve_ldffsds_be_zd,
6222 NULL, },
6223 { gen_helper_sve_ldffbdu_zd,
6224 gen_helper_sve_ldffhdu_be_zd,
6225 gen_helper_sve_ldffsdu_be_zd,
6226 gen_helper_sve_ldffdd_be_zd, } } } } },
6227 { /* MTE Active */
6228 { /* Little-endian */
6229 { { { gen_helper_sve_ldbds_zsu_mte,
6230 gen_helper_sve_ldhds_le_zsu_mte,
6231 gen_helper_sve_ldsds_le_zsu_mte,
6232 NULL, },
6233 { gen_helper_sve_ldbdu_zsu_mte,
6234 gen_helper_sve_ldhdu_le_zsu_mte,
6235 gen_helper_sve_ldsdu_le_zsu_mte,
6236 gen_helper_sve_lddd_le_zsu_mte, } },
6237 { { gen_helper_sve_ldbds_zss_mte,
6238 gen_helper_sve_ldhds_le_zss_mte,
6239 gen_helper_sve_ldsds_le_zss_mte,
6240 NULL, },
6241 { gen_helper_sve_ldbdu_zss_mte,
6242 gen_helper_sve_ldhdu_le_zss_mte,
6243 gen_helper_sve_ldsdu_le_zss_mte,
6244 gen_helper_sve_lddd_le_zss_mte, } },
6245 { { gen_helper_sve_ldbds_zd_mte,
6246 gen_helper_sve_ldhds_le_zd_mte,
6247 gen_helper_sve_ldsds_le_zd_mte,
6248 NULL, },
6249 { gen_helper_sve_ldbdu_zd_mte,
6250 gen_helper_sve_ldhdu_le_zd_mte,
6251 gen_helper_sve_ldsdu_le_zd_mte,
6252 gen_helper_sve_lddd_le_zd_mte, } } },
6253
6254 /* First-fault */
6255 { { { gen_helper_sve_ldffbds_zsu_mte,
6256 gen_helper_sve_ldffhds_le_zsu_mte,
6257 gen_helper_sve_ldffsds_le_zsu_mte,
6258 NULL, },
6259 { gen_helper_sve_ldffbdu_zsu_mte,
6260 gen_helper_sve_ldffhdu_le_zsu_mte,
6261 gen_helper_sve_ldffsdu_le_zsu_mte,
6262 gen_helper_sve_ldffdd_le_zsu_mte, } },
6263 { { gen_helper_sve_ldffbds_zss_mte,
6264 gen_helper_sve_ldffhds_le_zss_mte,
6265 gen_helper_sve_ldffsds_le_zss_mte,
6266 NULL, },
6267 { gen_helper_sve_ldffbdu_zss_mte,
6268 gen_helper_sve_ldffhdu_le_zss_mte,
6269 gen_helper_sve_ldffsdu_le_zss_mte,
6270 gen_helper_sve_ldffdd_le_zss_mte, } },
6271 { { gen_helper_sve_ldffbds_zd_mte,
6272 gen_helper_sve_ldffhds_le_zd_mte,
6273 gen_helper_sve_ldffsds_le_zd_mte,
6274 NULL, },
6275 { gen_helper_sve_ldffbdu_zd_mte,
6276 gen_helper_sve_ldffhdu_le_zd_mte,
6277 gen_helper_sve_ldffsdu_le_zd_mte,
6278 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6279 { /* Big-endian */
6280 { { { gen_helper_sve_ldbds_zsu_mte,
6281 gen_helper_sve_ldhds_be_zsu_mte,
6282 gen_helper_sve_ldsds_be_zsu_mte,
6283 NULL, },
6284 { gen_helper_sve_ldbdu_zsu_mte,
6285 gen_helper_sve_ldhdu_be_zsu_mte,
6286 gen_helper_sve_ldsdu_be_zsu_mte,
6287 gen_helper_sve_lddd_be_zsu_mte, } },
6288 { { gen_helper_sve_ldbds_zss_mte,
6289 gen_helper_sve_ldhds_be_zss_mte,
6290 gen_helper_sve_ldsds_be_zss_mte,
6291 NULL, },
6292 { gen_helper_sve_ldbdu_zss_mte,
6293 gen_helper_sve_ldhdu_be_zss_mte,
6294 gen_helper_sve_ldsdu_be_zss_mte,
6295 gen_helper_sve_lddd_be_zss_mte, } },
6296 { { gen_helper_sve_ldbds_zd_mte,
6297 gen_helper_sve_ldhds_be_zd_mte,
6298 gen_helper_sve_ldsds_be_zd_mte,
6299 NULL, },
6300 { gen_helper_sve_ldbdu_zd_mte,
6301 gen_helper_sve_ldhdu_be_zd_mte,
6302 gen_helper_sve_ldsdu_be_zd_mte,
6303 gen_helper_sve_lddd_be_zd_mte, } } },
6304
6305 /* First-fault */
6306 { { { gen_helper_sve_ldffbds_zsu_mte,
6307 gen_helper_sve_ldffhds_be_zsu_mte,
6308 gen_helper_sve_ldffsds_be_zsu_mte,
6309 NULL, },
6310 { gen_helper_sve_ldffbdu_zsu_mte,
6311 gen_helper_sve_ldffhdu_be_zsu_mte,
6312 gen_helper_sve_ldffsdu_be_zsu_mte,
6313 gen_helper_sve_ldffdd_be_zsu_mte, } },
6314 { { gen_helper_sve_ldffbds_zss_mte,
6315 gen_helper_sve_ldffhds_be_zss_mte,
6316 gen_helper_sve_ldffsds_be_zss_mte,
6317 NULL, },
6318 { gen_helper_sve_ldffbdu_zss_mte,
6319 gen_helper_sve_ldffhdu_be_zss_mte,
6320 gen_helper_sve_ldffsdu_be_zss_mte,
6321 gen_helper_sve_ldffdd_be_zss_mte, } },
6322 { { gen_helper_sve_ldffbds_zd_mte,
6323 gen_helper_sve_ldffhds_be_zd_mte,
6324 gen_helper_sve_ldffsds_be_zd_mte,
6325 NULL, },
6326 { gen_helper_sve_ldffbdu_zd_mte,
6327 gen_helper_sve_ldffhdu_be_zd_mte,
6328 gen_helper_sve_ldffsdu_be_zd_mte,
6329 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6330};
6331
3a7be554 6332static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6333{
6334 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6335 bool be = s->be_data == MO_BE;
6336 bool mte = s->mte_active[0];
673e9fa6
RH
6337
6338 if (!sve_access_check(s)) {
6339 return true;
6340 }
6341
6342 switch (a->esz) {
6343 case MO_32:
d28d12f0 6344 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6345 break;
6346 case MO_64:
d28d12f0 6347 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6348 break;
6349 }
6350 assert(fn != NULL);
6351
6352 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6353 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6354 return true;
6355}
6356
3a7be554 6357static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6358{
6359 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6360 bool be = s->be_data == MO_BE;
6361 bool mte = s->mte_active[0];
673e9fa6
RH
6362
6363 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6364 return false;
6365 }
6366 if (!sve_access_check(s)) {
6367 return true;
6368 }
6369
6370 switch (a->esz) {
6371 case MO_32:
d28d12f0 6372 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6373 break;
6374 case MO_64:
d28d12f0 6375 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6376 break;
6377 }
6378 assert(fn != NULL);
6379
6380 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6381 * by loading the immediate into the scalar parameter.
6382 */
2ccdf94f
RH
6383 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6384 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6385 return true;
6386}
6387
cf327449
SL
6388static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6389{
b17ab470
RH
6390 gen_helper_gvec_mem_scatter *fn = NULL;
6391 bool be = s->be_data == MO_BE;
6392 bool mte = s->mte_active[0];
6393
6394 if (a->esz < a->msz + !a->u) {
6395 return false;
6396 }
cf327449
SL
6397 if (!dc_isar_feature(aa64_sve2, s)) {
6398 return false;
6399 }
b17ab470
RH
6400 if (!sve_access_check(s)) {
6401 return true;
6402 }
6403
6404 switch (a->esz) {
6405 case MO_32:
6406 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6407 break;
6408 case MO_64:
6409 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6410 break;
6411 }
6412 assert(fn != NULL);
6413
6414 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6415 cpu_reg(s, a->rm), a->msz, false, fn);
6416 return true;
cf327449
SL
6417}
6418
d28d12f0
RH
6419/* Indexed by [mte][be][xs][msz]. */
6420static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6421 { /* MTE Inactive */
6422 { /* Little-endian */
6423 { gen_helper_sve_stbs_zsu,
6424 gen_helper_sve_sths_le_zsu,
6425 gen_helper_sve_stss_le_zsu, },
6426 { gen_helper_sve_stbs_zss,
6427 gen_helper_sve_sths_le_zss,
6428 gen_helper_sve_stss_le_zss, } },
6429 { /* Big-endian */
6430 { gen_helper_sve_stbs_zsu,
6431 gen_helper_sve_sths_be_zsu,
6432 gen_helper_sve_stss_be_zsu, },
6433 { gen_helper_sve_stbs_zss,
6434 gen_helper_sve_sths_be_zss,
6435 gen_helper_sve_stss_be_zss, } } },
6436 { /* MTE Active */
6437 { /* Little-endian */
6438 { gen_helper_sve_stbs_zsu_mte,
6439 gen_helper_sve_sths_le_zsu_mte,
6440 gen_helper_sve_stss_le_zsu_mte, },
6441 { gen_helper_sve_stbs_zss_mte,
6442 gen_helper_sve_sths_le_zss_mte,
6443 gen_helper_sve_stss_le_zss_mte, } },
6444 { /* Big-endian */
6445 { gen_helper_sve_stbs_zsu_mte,
6446 gen_helper_sve_sths_be_zsu_mte,
6447 gen_helper_sve_stss_be_zsu_mte, },
6448 { gen_helper_sve_stbs_zss_mte,
6449 gen_helper_sve_sths_be_zss_mte,
6450 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6451};
6452
6453/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6454static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6455 { /* MTE Inactive */
6456 { /* Little-endian */
6457 { gen_helper_sve_stbd_zsu,
6458 gen_helper_sve_sthd_le_zsu,
6459 gen_helper_sve_stsd_le_zsu,
6460 gen_helper_sve_stdd_le_zsu, },
6461 { gen_helper_sve_stbd_zss,
6462 gen_helper_sve_sthd_le_zss,
6463 gen_helper_sve_stsd_le_zss,
6464 gen_helper_sve_stdd_le_zss, },
6465 { gen_helper_sve_stbd_zd,
6466 gen_helper_sve_sthd_le_zd,
6467 gen_helper_sve_stsd_le_zd,
6468 gen_helper_sve_stdd_le_zd, } },
6469 { /* Big-endian */
6470 { gen_helper_sve_stbd_zsu,
6471 gen_helper_sve_sthd_be_zsu,
6472 gen_helper_sve_stsd_be_zsu,
6473 gen_helper_sve_stdd_be_zsu, },
6474 { gen_helper_sve_stbd_zss,
6475 gen_helper_sve_sthd_be_zss,
6476 gen_helper_sve_stsd_be_zss,
6477 gen_helper_sve_stdd_be_zss, },
6478 { gen_helper_sve_stbd_zd,
6479 gen_helper_sve_sthd_be_zd,
6480 gen_helper_sve_stsd_be_zd,
6481 gen_helper_sve_stdd_be_zd, } } },
6482 { /* MTE Inactive */
6483 { /* Little-endian */
6484 { gen_helper_sve_stbd_zsu_mte,
6485 gen_helper_sve_sthd_le_zsu_mte,
6486 gen_helper_sve_stsd_le_zsu_mte,
6487 gen_helper_sve_stdd_le_zsu_mte, },
6488 { gen_helper_sve_stbd_zss_mte,
6489 gen_helper_sve_sthd_le_zss_mte,
6490 gen_helper_sve_stsd_le_zss_mte,
6491 gen_helper_sve_stdd_le_zss_mte, },
6492 { gen_helper_sve_stbd_zd_mte,
6493 gen_helper_sve_sthd_le_zd_mte,
6494 gen_helper_sve_stsd_le_zd_mte,
6495 gen_helper_sve_stdd_le_zd_mte, } },
6496 { /* Big-endian */
6497 { gen_helper_sve_stbd_zsu_mte,
6498 gen_helper_sve_sthd_be_zsu_mte,
6499 gen_helper_sve_stsd_be_zsu_mte,
6500 gen_helper_sve_stdd_be_zsu_mte, },
6501 { gen_helper_sve_stbd_zss_mte,
6502 gen_helper_sve_sthd_be_zss_mte,
6503 gen_helper_sve_stsd_be_zss_mte,
6504 gen_helper_sve_stdd_be_zss_mte, },
6505 { gen_helper_sve_stbd_zd_mte,
6506 gen_helper_sve_sthd_be_zd_mte,
6507 gen_helper_sve_stsd_be_zd_mte,
6508 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6509};
6510
3a7be554 6511static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6512{
f6dbf62a 6513 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6514 bool be = s->be_data == MO_BE;
6515 bool mte = s->mte_active[0];
f6dbf62a
RH
6516
6517 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6518 return false;
6519 }
6520 if (!sve_access_check(s)) {
6521 return true;
6522 }
6523 switch (a->esz) {
6524 case MO_32:
d28d12f0 6525 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6526 break;
6527 case MO_64:
d28d12f0 6528 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6529 break;
6530 default:
6531 g_assert_not_reached();
6532 }
6533 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6534 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6535 return true;
6536}
dec6cf6b 6537
3a7be554 6538static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6539{
6540 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6541 bool be = s->be_data == MO_BE;
6542 bool mte = s->mte_active[0];
408ecde9
RH
6543
6544 if (a->esz < a->msz) {
6545 return false;
6546 }
6547 if (!sve_access_check(s)) {
6548 return true;
6549 }
6550
6551 switch (a->esz) {
6552 case MO_32:
d28d12f0 6553 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6554 break;
6555 case MO_64:
d28d12f0 6556 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6557 break;
6558 }
6559 assert(fn != NULL);
6560
6561 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6562 * by loading the immediate into the scalar parameter.
6563 */
2ccdf94f
RH
6564 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6565 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6566 return true;
6567}
6568
6ebca45f
SL
6569static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6570{
b17ab470
RH
6571 gen_helper_gvec_mem_scatter *fn;
6572 bool be = s->be_data == MO_BE;
6573 bool mte = s->mte_active[0];
6574
6575 if (a->esz < a->msz) {
6576 return false;
6577 }
6ebca45f
SL
6578 if (!dc_isar_feature(aa64_sve2, s)) {
6579 return false;
6580 }
b17ab470
RH
6581 if (!sve_access_check(s)) {
6582 return true;
6583 }
6584
6585 switch (a->esz) {
6586 case MO_32:
6587 fn = scatter_store_fn32[mte][be][0][a->msz];
6588 break;
6589 case MO_64:
6590 fn = scatter_store_fn64[mte][be][2][a->msz];
6591 break;
6592 default:
6593 g_assert_not_reached();
6594 }
6595
6596 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6597 cpu_reg(s, a->rm), a->msz, true, fn);
6598 return true;
6ebca45f
SL
6599}
6600
dec6cf6b
RH
6601/*
6602 * Prefetches
6603 */
6604
3a7be554 6605static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6606{
6607 /* Prefetch is a nop within QEMU. */
2f95a3b0 6608 (void)sve_access_check(s);
dec6cf6b
RH
6609 return true;
6610}
6611
3a7be554 6612static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6613{
6614 if (a->rm == 31) {
6615 return false;
6616 }
6617 /* Prefetch is a nop within QEMU. */
2f95a3b0 6618 (void)sve_access_check(s);
dec6cf6b
RH
6619 return true;
6620}
a2103582
RH
6621
6622/*
6623 * Move Prefix
6624 *
6625 * TODO: The implementation so far could handle predicated merging movprfx.
6626 * The helper functions as written take an extra source register to
6627 * use in the operation, but the result is only written when predication
6628 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6629 * to allow the final write back to the destination to be unconditional.
6630 * For predicated zeroing movprfx, we need to rearrange the helpers to
6631 * allow the final write back to zero inactives.
6632 *
6633 * In the meantime, just emit the moves.
6634 */
6635
3a7be554 6636static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6637{
6638 return do_mov_z(s, a->rd, a->rn);
6639}
6640
3a7be554 6641static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6642{
6643 if (sve_access_check(s)) {
6644 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6645 }
6646 return true;
6647}
6648
3a7be554 6649static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6650{
60245996 6651 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6652}
5dad1ba5
RH
6653
6654/*
6655 * SVE2 Integer Multiply - Unpredicated
6656 */
6657
6658static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6659{
6660 if (!dc_isar_feature(aa64_sve2, s)) {
6661 return false;
6662 }
6663 if (sve_access_check(s)) {
6664 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6665 }
6666 return true;
6667}
6668
6669static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6670 gen_helper_gvec_3 *fn)
6671{
913a8a00 6672 if (!dc_isar_feature(aa64_sve2, s)) {
5dad1ba5
RH
6673 return false;
6674 }
84a272f5 6675 return gen_gvec_ool_arg_zzz(s, fn, a, 0);
5dad1ba5
RH
6676}
6677
6678static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6679{
6680 static gen_helper_gvec_3 * const fns[4] = {
6681 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6682 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6683 };
6684 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6685}
6686
6687static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6688{
6689 static gen_helper_gvec_3 * const fns[4] = {
6690 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6691 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6692 };
6693 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6694}
6695
6696static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6697{
6698 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6699}
d4b1e59d 6700
169d7c58
RH
6701static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6702{
6703 static gen_helper_gvec_3 * const fns[4] = {
6704 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6705 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6706 };
6707 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6708}
6709
6710static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6711{
6712 static gen_helper_gvec_3 * const fns[4] = {
6713 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6714 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6715 };
6716 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6717}
6718
d4b1e59d
RH
6719/*
6720 * SVE2 Integer - Predicated
6721 */
6722
6723static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6724 gen_helper_gvec_4 *fn)
6725{
6726 if (!dc_isar_feature(aa64_sve2, s)) {
6727 return false;
6728 }
6729 return do_zpzz_ool(s, a, fn);
6730}
6731
6732static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6733{
6734 static gen_helper_gvec_4 * const fns[3] = {
6735 gen_helper_sve2_sadalp_zpzz_h,
6736 gen_helper_sve2_sadalp_zpzz_s,
6737 gen_helper_sve2_sadalp_zpzz_d,
6738 };
6739 if (a->esz == 0) {
6740 return false;
6741 }
6742 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6743}
6744
6745static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6746{
6747 static gen_helper_gvec_4 * const fns[3] = {
6748 gen_helper_sve2_uadalp_zpzz_h,
6749 gen_helper_sve2_uadalp_zpzz_s,
6750 gen_helper_sve2_uadalp_zpzz_d,
6751 };
6752 if (a->esz == 0) {
6753 return false;
6754 }
6755 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6756}
db366da8
RH
6757
6758/*
6759 * SVE2 integer unary operations (predicated)
6760 */
6761
6762static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6763 gen_helper_gvec_3 *fn)
6764{
6765 if (!dc_isar_feature(aa64_sve2, s)) {
6766 return false;
6767 }
6768 return do_zpz_ool(s, a, fn);
6769}
6770
6771static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6772{
6773 if (a->esz != 2) {
6774 return false;
6775 }
6776 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6777}
6778
6779static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6780{
6781 if (a->esz != 2) {
6782 return false;
6783 }
6784 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6785}
6786
6787static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6788{
6789 static gen_helper_gvec_3 * const fns[4] = {
6790 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6791 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6792 };
6793 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6794}
6795
6796static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6797{
6798 static gen_helper_gvec_3 * const fns[4] = {
6799 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6800 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6801 };
6802 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6803}
45d9503d
RH
6804
6805#define DO_SVE2_ZPZZ(NAME, name) \
6806static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6807{ \
6808 static gen_helper_gvec_4 * const fns[4] = { \
6809 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6810 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6811 }; \
6812 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6813}
6814
6815DO_SVE2_ZPZZ(SQSHL, sqshl)
6816DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6817DO_SVE2_ZPZZ(SRSHL, srshl)
6818
6819DO_SVE2_ZPZZ(UQSHL, uqshl)
6820DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6821DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6822
6823DO_SVE2_ZPZZ(SHADD, shadd)
6824DO_SVE2_ZPZZ(SRHADD, srhadd)
6825DO_SVE2_ZPZZ(SHSUB, shsub)
6826
6827DO_SVE2_ZPZZ(UHADD, uhadd)
6828DO_SVE2_ZPZZ(URHADD, urhadd)
6829DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6830
6831DO_SVE2_ZPZZ(ADDP, addp)
6832DO_SVE2_ZPZZ(SMAXP, smaxp)
6833DO_SVE2_ZPZZ(UMAXP, umaxp)
6834DO_SVE2_ZPZZ(SMINP, sminp)
6835DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6836
6837DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6838DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6839DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6840DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6841DO_SVE2_ZPZZ(SUQADD, suqadd)
6842DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6843
6844/*
6845 * SVE2 Widening Integer Arithmetic
6846 */
6847
6848static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6849 gen_helper_gvec_3 *fn, int data)
6850{
6851 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6852 return false;
6853 }
6854 if (sve_access_check(s)) {
6855 unsigned vsz = vec_full_reg_size(s);
6856 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6857 vec_full_reg_offset(s, a->rn),
6858 vec_full_reg_offset(s, a->rm),
6859 vsz, vsz, data, fn);
6860 }
6861 return true;
6862}
6863
6864#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6865static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6866{ \
6867 static gen_helper_gvec_3 * const fns[4] = { \
6868 NULL, gen_helper_sve2_##name##_h, \
6869 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6870 }; \
6871 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6872}
6873
6874DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6875DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6876DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6877
6878DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6879DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6880DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6881
6882DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6883DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6884DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6885
6886DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6887DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6888DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
daec426b
RH
6889
6890DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6891DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6892DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
81fccf09 6893
69ccc099
RH
6894DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6895DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6896
6897DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6898DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6899
6900DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6901DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6902
2df3ca55
RH
6903static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6904{
6905 static gen_helper_gvec_3 * const fns[4] = {
6906 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6907 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6908 };
6909 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6910}
6911
6912static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6913{
6914 return do_eor_tb(s, a, false);
6915}
6916
6917static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6918{
6919 return do_eor_tb(s, a, true);
6920}
6921
e3a56131
RH
6922static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6923{
6924 static gen_helper_gvec_3 * const fns[4] = {
6925 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6926 NULL, gen_helper_sve2_pmull_d,
6927 };
6928 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6929 return false;
6930 }
6931 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6932}
6933
6934static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6935{
6936 return do_trans_pmull(s, a, false);
6937}
6938
6939static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
6940{
6941 return do_trans_pmull(s, a, true);
6942}
6943
81fccf09
RH
6944#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
6945static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6946{ \
6947 static gen_helper_gvec_3 * const fns[4] = { \
6948 NULL, gen_helper_sve2_##name##_h, \
6949 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6950 }; \
6951 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
6952}
6953
6954DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
6955DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
6956DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
6957DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
6958
6959DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
6960DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
6961DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
6962DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
4269fef1
RH
6963
6964static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6965{
6966 int top = imm & 1;
6967 int shl = imm >> 1;
6968 int halfbits = 4 << vece;
6969
6970 if (top) {
6971 if (shl == halfbits) {
6972 TCGv_vec t = tcg_temp_new_vec_matching(d);
6973 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6974 tcg_gen_and_vec(vece, d, n, t);
6975 tcg_temp_free_vec(t);
6976 } else {
6977 tcg_gen_sari_vec(vece, d, n, halfbits);
6978 tcg_gen_shli_vec(vece, d, d, shl);
6979 }
6980 } else {
6981 tcg_gen_shli_vec(vece, d, n, halfbits);
6982 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6983 }
6984}
6985
6986static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6987{
6988 int halfbits = 4 << vece;
6989 int top = imm & 1;
6990 int shl = (imm >> 1);
6991 int shift;
6992 uint64_t mask;
6993
6994 mask = MAKE_64BIT_MASK(0, halfbits);
6995 mask <<= shl;
6996 mask = dup_const(vece, mask);
6997
6998 shift = shl - top * halfbits;
6999 if (shift < 0) {
7000 tcg_gen_shri_i64(d, n, -shift);
7001 } else {
7002 tcg_gen_shli_i64(d, n, shift);
7003 }
7004 tcg_gen_andi_i64(d, d, mask);
7005}
7006
7007static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7008{
7009 gen_ushll_i64(MO_16, d, n, imm);
7010}
7011
7012static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7013{
7014 gen_ushll_i64(MO_32, d, n, imm);
7015}
7016
7017static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7018{
7019 gen_ushll_i64(MO_64, d, n, imm);
7020}
7021
7022static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7023{
7024 int halfbits = 4 << vece;
7025 int top = imm & 1;
7026 int shl = imm >> 1;
7027
7028 if (top) {
7029 if (shl == halfbits) {
7030 TCGv_vec t = tcg_temp_new_vec_matching(d);
7031 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7032 tcg_gen_and_vec(vece, d, n, t);
7033 tcg_temp_free_vec(t);
7034 } else {
7035 tcg_gen_shri_vec(vece, d, n, halfbits);
7036 tcg_gen_shli_vec(vece, d, d, shl);
7037 }
7038 } else {
7039 if (shl == 0) {
7040 TCGv_vec t = tcg_temp_new_vec_matching(d);
7041 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7042 tcg_gen_and_vec(vece, d, n, t);
7043 tcg_temp_free_vec(t);
7044 } else {
7045 tcg_gen_shli_vec(vece, d, n, halfbits);
7046 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7047 }
7048 }
7049}
7050
7051static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7052 bool sel, bool uns)
7053{
7054 static const TCGOpcode sshll_list[] = {
7055 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7056 };
7057 static const TCGOpcode ushll_list[] = {
7058 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7059 };
7060 static const GVecGen2i ops[2][3] = {
7061 { { .fniv = gen_sshll_vec,
7062 .opt_opc = sshll_list,
7063 .fno = gen_helper_sve2_sshll_h,
7064 .vece = MO_16 },
7065 { .fniv = gen_sshll_vec,
7066 .opt_opc = sshll_list,
7067 .fno = gen_helper_sve2_sshll_s,
7068 .vece = MO_32 },
7069 { .fniv = gen_sshll_vec,
7070 .opt_opc = sshll_list,
7071 .fno = gen_helper_sve2_sshll_d,
7072 .vece = MO_64 } },
7073 { { .fni8 = gen_ushll16_i64,
7074 .fniv = gen_ushll_vec,
7075 .opt_opc = ushll_list,
7076 .fno = gen_helper_sve2_ushll_h,
7077 .vece = MO_16 },
7078 { .fni8 = gen_ushll32_i64,
7079 .fniv = gen_ushll_vec,
7080 .opt_opc = ushll_list,
7081 .fno = gen_helper_sve2_ushll_s,
7082 .vece = MO_32 },
7083 { .fni8 = gen_ushll64_i64,
7084 .fniv = gen_ushll_vec,
7085 .opt_opc = ushll_list,
7086 .fno = gen_helper_sve2_ushll_d,
7087 .vece = MO_64 } },
7088 };
7089
7090 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7091 return false;
7092 }
7093 if (sve_access_check(s)) {
7094 unsigned vsz = vec_full_reg_size(s);
7095 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7096 vec_full_reg_offset(s, a->rn),
7097 vsz, vsz, (a->imm << 1) | sel,
7098 &ops[uns][a->esz]);
7099 }
7100 return true;
7101}
7102
7103static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7104{
7105 return do_sve2_shll_tb(s, a, false, false);
7106}
7107
7108static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7109{
7110 return do_sve2_shll_tb(s, a, true, false);
7111}
7112
7113static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7114{
7115 return do_sve2_shll_tb(s, a, false, true);
7116}
7117
7118static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7119{
7120 return do_sve2_shll_tb(s, a, true, true);
7121}
cb9c33b8
RH
7122
7123static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7124{
7125 static gen_helper_gvec_3 * const fns[4] = {
7126 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7127 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7128 };
7129 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7130 return false;
7131 }
7132 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7133}
7134
7135static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7136{
7137 static gen_helper_gvec_3 * const fns[4] = {
7138 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7139 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7140 };
7141 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7142 return false;
7143 }
7144 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7145}
7146
7147static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7148{
7149 static gen_helper_gvec_3 * const fns[4] = {
7150 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7151 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7152 };
7153 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7154 return false;
7155 }
7156 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7157}
ed4a6387
RH
7158
7159static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7160{
7161 static gen_helper_gvec_3 * const fns[2][4] = {
7162 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7163 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7164 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7165 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7166 };
7167 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7168}
7169
7170static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7171{
7172 return do_cadd(s, a, false, false);
7173}
7174
7175static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7176{
7177 return do_cadd(s, a, false, true);
7178}
7179
7180static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7181{
7182 return do_cadd(s, a, true, false);
7183}
7184
7185static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7186{
7187 return do_cadd(s, a, true, true);
7188}
38650638
RH
7189
7190static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7191 gen_helper_gvec_4 *fn, int data)
7192{
7193 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7194 return false;
7195 }
7196 if (sve_access_check(s)) {
7197 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7198 }
7199 return true;
7200}
7201
7202static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7203{
7204 static gen_helper_gvec_4 * const fns[2][4] = {
7205 { NULL, gen_helper_sve2_sabal_h,
7206 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7207 { NULL, gen_helper_sve2_uabal_h,
7208 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7209 };
7210 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7211}
7212
7213static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7214{
7215 return do_abal(s, a, false, false);
7216}
7217
7218static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7219{
7220 return do_abal(s, a, false, true);
7221}
7222
7223static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7224{
7225 return do_abal(s, a, true, false);
7226}
7227
7228static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7229{
7230 return do_abal(s, a, true, true);
7231}
b8295dfb
RH
7232
7233static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7234{
7235 static gen_helper_gvec_4 * const fns[2] = {
7236 gen_helper_sve2_adcl_s,
7237 gen_helper_sve2_adcl_d,
7238 };
7239 /*
7240 * Note that in this case the ESZ field encodes both size and sign.
7241 * Split out 'subtract' into bit 1 of the data field for the helper.
7242 */
7243 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7244}
7245
7246static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7247{
7248 return do_adcl(s, a, false);
7249}
7250
7251static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7252{
7253 return do_adcl(s, a, true);
7254}
a7e3a90e
RH
7255
7256static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7257{
7258 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7259 return false;
7260 }
7261 if (sve_access_check(s)) {
7262 unsigned vsz = vec_full_reg_size(s);
7263 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7264 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7265 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7266 }
7267 return true;
7268}
7269
7270static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7271{
7272 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7273}
7274
7275static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7276{
7277 return do_sve2_fn2i(s, a, gen_gvec_usra);
7278}
7279
7280static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7281{
7282 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7283}
7284
7285static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7286{
7287 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7288}
fc12b46a
RH
7289
7290static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7291{
7292 return do_sve2_fn2i(s, a, gen_gvec_sri);
7293}
7294
7295static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7296{
7297 return do_sve2_fn2i(s, a, gen_gvec_sli);
7298}
289a1797
RH
7299
7300static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7301{
7302 if (!dc_isar_feature(aa64_sve2, s)) {
7303 return false;
7304 }
7305 if (sve_access_check(s)) {
7306 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7307 }
7308 return true;
7309}
7310
7311static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7312{
7313 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7314}
7315
7316static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7317{
7318 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7319}
5ff2838d
RH
7320
7321static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7322 const GVecGen2 ops[3])
7323{
7324 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7325 !dc_isar_feature(aa64_sve2, s)) {
7326 return false;
7327 }
7328 if (sve_access_check(s)) {
7329 unsigned vsz = vec_full_reg_size(s);
7330 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7331 vec_full_reg_offset(s, a->rn),
7332 vsz, vsz, &ops[a->esz]);
7333 }
7334 return true;
7335}
7336
7337static const TCGOpcode sqxtn_list[] = {
7338 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7339};
7340
7341static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7342{
7343 TCGv_vec t = tcg_temp_new_vec_matching(d);
7344 int halfbits = 4 << vece;
7345 int64_t mask = (1ull << halfbits) - 1;
7346 int64_t min = -1ull << (halfbits - 1);
7347 int64_t max = -min - 1;
7348
7349 tcg_gen_dupi_vec(vece, t, min);
7350 tcg_gen_smax_vec(vece, d, n, t);
7351 tcg_gen_dupi_vec(vece, t, max);
7352 tcg_gen_smin_vec(vece, d, d, t);
7353 tcg_gen_dupi_vec(vece, t, mask);
7354 tcg_gen_and_vec(vece, d, d, t);
7355 tcg_temp_free_vec(t);
7356}
7357
7358static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7359{
7360 static const GVecGen2 ops[3] = {
7361 { .fniv = gen_sqxtnb_vec,
7362 .opt_opc = sqxtn_list,
7363 .fno = gen_helper_sve2_sqxtnb_h,
7364 .vece = MO_16 },
7365 { .fniv = gen_sqxtnb_vec,
7366 .opt_opc = sqxtn_list,
7367 .fno = gen_helper_sve2_sqxtnb_s,
7368 .vece = MO_32 },
7369 { .fniv = gen_sqxtnb_vec,
7370 .opt_opc = sqxtn_list,
7371 .fno = gen_helper_sve2_sqxtnb_d,
7372 .vece = MO_64 },
7373 };
7374 return do_sve2_narrow_extract(s, a, ops);
7375}
7376
7377static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7378{
7379 TCGv_vec t = tcg_temp_new_vec_matching(d);
7380 int halfbits = 4 << vece;
7381 int64_t mask = (1ull << halfbits) - 1;
7382 int64_t min = -1ull << (halfbits - 1);
7383 int64_t max = -min - 1;
7384
7385 tcg_gen_dupi_vec(vece, t, min);
7386 tcg_gen_smax_vec(vece, n, n, t);
7387 tcg_gen_dupi_vec(vece, t, max);
7388 tcg_gen_smin_vec(vece, n, n, t);
7389 tcg_gen_shli_vec(vece, n, n, halfbits);
7390 tcg_gen_dupi_vec(vece, t, mask);
7391 tcg_gen_bitsel_vec(vece, d, t, d, n);
7392 tcg_temp_free_vec(t);
7393}
7394
7395static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7396{
7397 static const GVecGen2 ops[3] = {
7398 { .fniv = gen_sqxtnt_vec,
7399 .opt_opc = sqxtn_list,
7400 .load_dest = true,
7401 .fno = gen_helper_sve2_sqxtnt_h,
7402 .vece = MO_16 },
7403 { .fniv = gen_sqxtnt_vec,
7404 .opt_opc = sqxtn_list,
7405 .load_dest = true,
7406 .fno = gen_helper_sve2_sqxtnt_s,
7407 .vece = MO_32 },
7408 { .fniv = gen_sqxtnt_vec,
7409 .opt_opc = sqxtn_list,
7410 .load_dest = true,
7411 .fno = gen_helper_sve2_sqxtnt_d,
7412 .vece = MO_64 },
7413 };
7414 return do_sve2_narrow_extract(s, a, ops);
7415}
7416
7417static const TCGOpcode uqxtn_list[] = {
7418 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7419};
7420
7421static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7422{
7423 TCGv_vec t = tcg_temp_new_vec_matching(d);
7424 int halfbits = 4 << vece;
7425 int64_t max = (1ull << halfbits) - 1;
7426
7427 tcg_gen_dupi_vec(vece, t, max);
7428 tcg_gen_umin_vec(vece, d, n, t);
7429 tcg_temp_free_vec(t);
7430}
7431
7432static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7433{
7434 static const GVecGen2 ops[3] = {
7435 { .fniv = gen_uqxtnb_vec,
7436 .opt_opc = uqxtn_list,
7437 .fno = gen_helper_sve2_uqxtnb_h,
7438 .vece = MO_16 },
7439 { .fniv = gen_uqxtnb_vec,
7440 .opt_opc = uqxtn_list,
7441 .fno = gen_helper_sve2_uqxtnb_s,
7442 .vece = MO_32 },
7443 { .fniv = gen_uqxtnb_vec,
7444 .opt_opc = uqxtn_list,
7445 .fno = gen_helper_sve2_uqxtnb_d,
7446 .vece = MO_64 },
7447 };
7448 return do_sve2_narrow_extract(s, a, ops);
7449}
7450
7451static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7452{
7453 TCGv_vec t = tcg_temp_new_vec_matching(d);
7454 int halfbits = 4 << vece;
7455 int64_t max = (1ull << halfbits) - 1;
7456
7457 tcg_gen_dupi_vec(vece, t, max);
7458 tcg_gen_umin_vec(vece, n, n, t);
7459 tcg_gen_shli_vec(vece, n, n, halfbits);
7460 tcg_gen_bitsel_vec(vece, d, t, d, n);
7461 tcg_temp_free_vec(t);
7462}
7463
7464static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7465{
7466 static const GVecGen2 ops[3] = {
7467 { .fniv = gen_uqxtnt_vec,
7468 .opt_opc = uqxtn_list,
7469 .load_dest = true,
7470 .fno = gen_helper_sve2_uqxtnt_h,
7471 .vece = MO_16 },
7472 { .fniv = gen_uqxtnt_vec,
7473 .opt_opc = uqxtn_list,
7474 .load_dest = true,
7475 .fno = gen_helper_sve2_uqxtnt_s,
7476 .vece = MO_32 },
7477 { .fniv = gen_uqxtnt_vec,
7478 .opt_opc = uqxtn_list,
7479 .load_dest = true,
7480 .fno = gen_helper_sve2_uqxtnt_d,
7481 .vece = MO_64 },
7482 };
7483 return do_sve2_narrow_extract(s, a, ops);
7484}
7485
7486static const TCGOpcode sqxtun_list[] = {
7487 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7488};
7489
7490static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7491{
7492 TCGv_vec t = tcg_temp_new_vec_matching(d);
7493 int halfbits = 4 << vece;
7494 int64_t max = (1ull << halfbits) - 1;
7495
7496 tcg_gen_dupi_vec(vece, t, 0);
7497 tcg_gen_smax_vec(vece, d, n, t);
7498 tcg_gen_dupi_vec(vece, t, max);
7499 tcg_gen_umin_vec(vece, d, d, t);
7500 tcg_temp_free_vec(t);
7501}
7502
7503static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7504{
7505 static const GVecGen2 ops[3] = {
7506 { .fniv = gen_sqxtunb_vec,
7507 .opt_opc = sqxtun_list,
7508 .fno = gen_helper_sve2_sqxtunb_h,
7509 .vece = MO_16 },
7510 { .fniv = gen_sqxtunb_vec,
7511 .opt_opc = sqxtun_list,
7512 .fno = gen_helper_sve2_sqxtunb_s,
7513 .vece = MO_32 },
7514 { .fniv = gen_sqxtunb_vec,
7515 .opt_opc = sqxtun_list,
7516 .fno = gen_helper_sve2_sqxtunb_d,
7517 .vece = MO_64 },
7518 };
7519 return do_sve2_narrow_extract(s, a, ops);
7520}
7521
7522static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7523{
7524 TCGv_vec t = tcg_temp_new_vec_matching(d);
7525 int halfbits = 4 << vece;
7526 int64_t max = (1ull << halfbits) - 1;
7527
7528 tcg_gen_dupi_vec(vece, t, 0);
7529 tcg_gen_smax_vec(vece, n, n, t);
7530 tcg_gen_dupi_vec(vece, t, max);
7531 tcg_gen_umin_vec(vece, n, n, t);
7532 tcg_gen_shli_vec(vece, n, n, halfbits);
7533 tcg_gen_bitsel_vec(vece, d, t, d, n);
7534 tcg_temp_free_vec(t);
7535}
7536
7537static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7538{
7539 static const GVecGen2 ops[3] = {
7540 { .fniv = gen_sqxtunt_vec,
7541 .opt_opc = sqxtun_list,
7542 .load_dest = true,
7543 .fno = gen_helper_sve2_sqxtunt_h,
7544 .vece = MO_16 },
7545 { .fniv = gen_sqxtunt_vec,
7546 .opt_opc = sqxtun_list,
7547 .load_dest = true,
7548 .fno = gen_helper_sve2_sqxtunt_s,
7549 .vece = MO_32 },
7550 { .fniv = gen_sqxtunt_vec,
7551 .opt_opc = sqxtun_list,
7552 .load_dest = true,
7553 .fno = gen_helper_sve2_sqxtunt_d,
7554 .vece = MO_64 },
7555 };
7556 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7557}
7558
7559static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7560 const GVecGen2i ops[3])
7561{
7562 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7563 return false;
7564 }
7565 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7566 if (sve_access_check(s)) {
7567 unsigned vsz = vec_full_reg_size(s);
7568 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7569 vec_full_reg_offset(s, a->rn),
7570 vsz, vsz, a->imm, &ops[a->esz]);
7571 }
7572 return true;
7573}
7574
7575static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7576{
7577 int halfbits = 4 << vece;
7578 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7579
7580 tcg_gen_shri_i64(d, n, shr);
7581 tcg_gen_andi_i64(d, d, mask);
7582}
7583
7584static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7585{
7586 gen_shrnb_i64(MO_16, d, n, shr);
7587}
7588
7589static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7590{
7591 gen_shrnb_i64(MO_32, d, n, shr);
7592}
7593
7594static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7595{
7596 gen_shrnb_i64(MO_64, d, n, shr);
7597}
7598
7599static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7600{
7601 TCGv_vec t = tcg_temp_new_vec_matching(d);
7602 int halfbits = 4 << vece;
7603 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7604
7605 tcg_gen_shri_vec(vece, n, n, shr);
7606 tcg_gen_dupi_vec(vece, t, mask);
7607 tcg_gen_and_vec(vece, d, n, t);
7608 tcg_temp_free_vec(t);
7609}
7610
7611static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7612{
7613 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7614 static const GVecGen2i ops[3] = {
7615 { .fni8 = gen_shrnb16_i64,
7616 .fniv = gen_shrnb_vec,
7617 .opt_opc = vec_list,
7618 .fno = gen_helper_sve2_shrnb_h,
7619 .vece = MO_16 },
7620 { .fni8 = gen_shrnb32_i64,
7621 .fniv = gen_shrnb_vec,
7622 .opt_opc = vec_list,
7623 .fno = gen_helper_sve2_shrnb_s,
7624 .vece = MO_32 },
7625 { .fni8 = gen_shrnb64_i64,
7626 .fniv = gen_shrnb_vec,
7627 .opt_opc = vec_list,
7628 .fno = gen_helper_sve2_shrnb_d,
7629 .vece = MO_64 },
7630 };
7631 return do_sve2_shr_narrow(s, a, ops);
7632}
7633
7634static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7635{
7636 int halfbits = 4 << vece;
7637 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7638
7639 tcg_gen_shli_i64(n, n, halfbits - shr);
7640 tcg_gen_andi_i64(n, n, ~mask);
7641 tcg_gen_andi_i64(d, d, mask);
7642 tcg_gen_or_i64(d, d, n);
7643}
7644
7645static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7646{
7647 gen_shrnt_i64(MO_16, d, n, shr);
7648}
7649
7650static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7651{
7652 gen_shrnt_i64(MO_32, d, n, shr);
7653}
7654
7655static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7656{
7657 tcg_gen_shri_i64(n, n, shr);
7658 tcg_gen_deposit_i64(d, d, n, 32, 32);
7659}
7660
7661static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7662{
7663 TCGv_vec t = tcg_temp_new_vec_matching(d);
7664 int halfbits = 4 << vece;
7665 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7666
7667 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7668 tcg_gen_dupi_vec(vece, t, mask);
7669 tcg_gen_bitsel_vec(vece, d, t, d, n);
7670 tcg_temp_free_vec(t);
7671}
7672
7673static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7674{
7675 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7676 static const GVecGen2i ops[3] = {
7677 { .fni8 = gen_shrnt16_i64,
7678 .fniv = gen_shrnt_vec,
7679 .opt_opc = vec_list,
7680 .load_dest = true,
7681 .fno = gen_helper_sve2_shrnt_h,
7682 .vece = MO_16 },
7683 { .fni8 = gen_shrnt32_i64,
7684 .fniv = gen_shrnt_vec,
7685 .opt_opc = vec_list,
7686 .load_dest = true,
7687 .fno = gen_helper_sve2_shrnt_s,
7688 .vece = MO_32 },
7689 { .fni8 = gen_shrnt64_i64,
7690 .fniv = gen_shrnt_vec,
7691 .opt_opc = vec_list,
7692 .load_dest = true,
7693 .fno = gen_helper_sve2_shrnt_d,
7694 .vece = MO_64 },
7695 };
7696 return do_sve2_shr_narrow(s, a, ops);
7697}
7698
7699static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7700{
7701 static const GVecGen2i ops[3] = {
7702 { .fno = gen_helper_sve2_rshrnb_h },
7703 { .fno = gen_helper_sve2_rshrnb_s },
7704 { .fno = gen_helper_sve2_rshrnb_d },
7705 };
7706 return do_sve2_shr_narrow(s, a, ops);
7707}
7708
7709static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7710{
7711 static const GVecGen2i ops[3] = {
7712 { .fno = gen_helper_sve2_rshrnt_h },
7713 { .fno = gen_helper_sve2_rshrnt_s },
7714 { .fno = gen_helper_sve2_rshrnt_d },
7715 };
7716 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7717}
7718
7719static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7720 TCGv_vec n, int64_t shr)
7721{
7722 TCGv_vec t = tcg_temp_new_vec_matching(d);
7723 int halfbits = 4 << vece;
7724
7725 tcg_gen_sari_vec(vece, n, n, shr);
7726 tcg_gen_dupi_vec(vece, t, 0);
7727 tcg_gen_smax_vec(vece, n, n, t);
7728 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7729 tcg_gen_umin_vec(vece, d, n, t);
7730 tcg_temp_free_vec(t);
7731}
7732
7733static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7734{
7735 static const TCGOpcode vec_list[] = {
7736 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7737 };
7738 static const GVecGen2i ops[3] = {
7739 { .fniv = gen_sqshrunb_vec,
7740 .opt_opc = vec_list,
7741 .fno = gen_helper_sve2_sqshrunb_h,
7742 .vece = MO_16 },
7743 { .fniv = gen_sqshrunb_vec,
7744 .opt_opc = vec_list,
7745 .fno = gen_helper_sve2_sqshrunb_s,
7746 .vece = MO_32 },
7747 { .fniv = gen_sqshrunb_vec,
7748 .opt_opc = vec_list,
7749 .fno = gen_helper_sve2_sqshrunb_d,
7750 .vece = MO_64 },
7751 };
7752 return do_sve2_shr_narrow(s, a, ops);
7753}
7754
7755static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7756 TCGv_vec n, int64_t shr)
7757{
7758 TCGv_vec t = tcg_temp_new_vec_matching(d);
7759 int halfbits = 4 << vece;
7760
7761 tcg_gen_sari_vec(vece, n, n, shr);
7762 tcg_gen_dupi_vec(vece, t, 0);
7763 tcg_gen_smax_vec(vece, n, n, t);
7764 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7765 tcg_gen_umin_vec(vece, n, n, t);
7766 tcg_gen_shli_vec(vece, n, n, halfbits);
7767 tcg_gen_bitsel_vec(vece, d, t, d, n);
7768 tcg_temp_free_vec(t);
7769}
7770
7771static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7772{
7773 static const TCGOpcode vec_list[] = {
7774 INDEX_op_shli_vec, INDEX_op_sari_vec,
7775 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7776 };
7777 static const GVecGen2i ops[3] = {
7778 { .fniv = gen_sqshrunt_vec,
7779 .opt_opc = vec_list,
7780 .load_dest = true,
7781 .fno = gen_helper_sve2_sqshrunt_h,
7782 .vece = MO_16 },
7783 { .fniv = gen_sqshrunt_vec,
7784 .opt_opc = vec_list,
7785 .load_dest = true,
7786 .fno = gen_helper_sve2_sqshrunt_s,
7787 .vece = MO_32 },
7788 { .fniv = gen_sqshrunt_vec,
7789 .opt_opc = vec_list,
7790 .load_dest = true,
7791 .fno = gen_helper_sve2_sqshrunt_d,
7792 .vece = MO_64 },
7793 };
7794 return do_sve2_shr_narrow(s, a, ops);
7795}
7796
7797static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7798{
7799 static const GVecGen2i ops[3] = {
7800 { .fno = gen_helper_sve2_sqrshrunb_h },
7801 { .fno = gen_helper_sve2_sqrshrunb_s },
7802 { .fno = gen_helper_sve2_sqrshrunb_d },
7803 };
7804 return do_sve2_shr_narrow(s, a, ops);
7805}
7806
7807static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7808{
7809 static const GVecGen2i ops[3] = {
7810 { .fno = gen_helper_sve2_sqrshrunt_h },
7811 { .fno = gen_helper_sve2_sqrshrunt_s },
7812 { .fno = gen_helper_sve2_sqrshrunt_d },
7813 };
7814 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7815}
7816
743bb147
RH
7817static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7818 TCGv_vec n, int64_t shr)
7819{
7820 TCGv_vec t = tcg_temp_new_vec_matching(d);
7821 int halfbits = 4 << vece;
7822 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7823 int64_t min = -max - 1;
7824
7825 tcg_gen_sari_vec(vece, n, n, shr);
7826 tcg_gen_dupi_vec(vece, t, min);
7827 tcg_gen_smax_vec(vece, n, n, t);
7828 tcg_gen_dupi_vec(vece, t, max);
7829 tcg_gen_smin_vec(vece, n, n, t);
7830 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7831 tcg_gen_and_vec(vece, d, n, t);
7832 tcg_temp_free_vec(t);
7833}
7834
7835static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7836{
7837 static const TCGOpcode vec_list[] = {
7838 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7839 };
7840 static const GVecGen2i ops[3] = {
7841 { .fniv = gen_sqshrnb_vec,
7842 .opt_opc = vec_list,
7843 .fno = gen_helper_sve2_sqshrnb_h,
7844 .vece = MO_16 },
7845 { .fniv = gen_sqshrnb_vec,
7846 .opt_opc = vec_list,
7847 .fno = gen_helper_sve2_sqshrnb_s,
7848 .vece = MO_32 },
7849 { .fniv = gen_sqshrnb_vec,
7850 .opt_opc = vec_list,
7851 .fno = gen_helper_sve2_sqshrnb_d,
7852 .vece = MO_64 },
7853 };
7854 return do_sve2_shr_narrow(s, a, ops);
7855}
7856
7857static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7858 TCGv_vec n, int64_t shr)
7859{
7860 TCGv_vec t = tcg_temp_new_vec_matching(d);
7861 int halfbits = 4 << vece;
7862 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7863 int64_t min = -max - 1;
7864
7865 tcg_gen_sari_vec(vece, n, n, shr);
7866 tcg_gen_dupi_vec(vece, t, min);
7867 tcg_gen_smax_vec(vece, n, n, t);
7868 tcg_gen_dupi_vec(vece, t, max);
7869 tcg_gen_smin_vec(vece, n, n, t);
7870 tcg_gen_shli_vec(vece, n, n, halfbits);
7871 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7872 tcg_gen_bitsel_vec(vece, d, t, d, n);
7873 tcg_temp_free_vec(t);
7874}
7875
7876static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7877{
7878 static const TCGOpcode vec_list[] = {
7879 INDEX_op_shli_vec, INDEX_op_sari_vec,
7880 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7881 };
7882 static const GVecGen2i ops[3] = {
7883 { .fniv = gen_sqshrnt_vec,
7884 .opt_opc = vec_list,
7885 .load_dest = true,
7886 .fno = gen_helper_sve2_sqshrnt_h,
7887 .vece = MO_16 },
7888 { .fniv = gen_sqshrnt_vec,
7889 .opt_opc = vec_list,
7890 .load_dest = true,
7891 .fno = gen_helper_sve2_sqshrnt_s,
7892 .vece = MO_32 },
7893 { .fniv = gen_sqshrnt_vec,
7894 .opt_opc = vec_list,
7895 .load_dest = true,
7896 .fno = gen_helper_sve2_sqshrnt_d,
7897 .vece = MO_64 },
7898 };
7899 return do_sve2_shr_narrow(s, a, ops);
7900}
7901
7902static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7903{
7904 static const GVecGen2i ops[3] = {
7905 { .fno = gen_helper_sve2_sqrshrnb_h },
7906 { .fno = gen_helper_sve2_sqrshrnb_s },
7907 { .fno = gen_helper_sve2_sqrshrnb_d },
7908 };
7909 return do_sve2_shr_narrow(s, a, ops);
7910}
7911
7912static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7913{
7914 static const GVecGen2i ops[3] = {
7915 { .fno = gen_helper_sve2_sqrshrnt_h },
7916 { .fno = gen_helper_sve2_sqrshrnt_s },
7917 { .fno = gen_helper_sve2_sqrshrnt_d },
7918 };
7919 return do_sve2_shr_narrow(s, a, ops);
7920}
7921
c13418da
RH
7922static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7923 TCGv_vec n, int64_t shr)
7924{
7925 TCGv_vec t = tcg_temp_new_vec_matching(d);
7926 int halfbits = 4 << vece;
7927
7928 tcg_gen_shri_vec(vece, n, n, shr);
7929 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7930 tcg_gen_umin_vec(vece, d, n, t);
7931 tcg_temp_free_vec(t);
7932}
7933
7934static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7935{
7936 static const TCGOpcode vec_list[] = {
7937 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7938 };
7939 static const GVecGen2i ops[3] = {
7940 { .fniv = gen_uqshrnb_vec,
7941 .opt_opc = vec_list,
7942 .fno = gen_helper_sve2_uqshrnb_h,
7943 .vece = MO_16 },
7944 { .fniv = gen_uqshrnb_vec,
7945 .opt_opc = vec_list,
7946 .fno = gen_helper_sve2_uqshrnb_s,
7947 .vece = MO_32 },
7948 { .fniv = gen_uqshrnb_vec,
7949 .opt_opc = vec_list,
7950 .fno = gen_helper_sve2_uqshrnb_d,
7951 .vece = MO_64 },
7952 };
7953 return do_sve2_shr_narrow(s, a, ops);
7954}
7955
7956static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7957 TCGv_vec n, int64_t shr)
7958{
7959 TCGv_vec t = tcg_temp_new_vec_matching(d);
7960 int halfbits = 4 << vece;
7961
7962 tcg_gen_shri_vec(vece, n, n, shr);
7963 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7964 tcg_gen_umin_vec(vece, n, n, t);
7965 tcg_gen_shli_vec(vece, n, n, halfbits);
7966 tcg_gen_bitsel_vec(vece, d, t, d, n);
7967 tcg_temp_free_vec(t);
7968}
7969
7970static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7971{
7972 static const TCGOpcode vec_list[] = {
7973 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7974 };
7975 static const GVecGen2i ops[3] = {
7976 { .fniv = gen_uqshrnt_vec,
7977 .opt_opc = vec_list,
7978 .load_dest = true,
7979 .fno = gen_helper_sve2_uqshrnt_h,
7980 .vece = MO_16 },
7981 { .fniv = gen_uqshrnt_vec,
7982 .opt_opc = vec_list,
7983 .load_dest = true,
7984 .fno = gen_helper_sve2_uqshrnt_s,
7985 .vece = MO_32 },
7986 { .fniv = gen_uqshrnt_vec,
7987 .opt_opc = vec_list,
7988 .load_dest = true,
7989 .fno = gen_helper_sve2_uqshrnt_d,
7990 .vece = MO_64 },
7991 };
7992 return do_sve2_shr_narrow(s, a, ops);
7993}
7994
7995static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7996{
7997 static const GVecGen2i ops[3] = {
7998 { .fno = gen_helper_sve2_uqrshrnb_h },
7999 { .fno = gen_helper_sve2_uqrshrnb_s },
8000 { .fno = gen_helper_sve2_uqrshrnb_d },
8001 };
8002 return do_sve2_shr_narrow(s, a, ops);
8003}
8004
8005static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8006{
8007 static const GVecGen2i ops[3] = {
8008 { .fno = gen_helper_sve2_uqrshrnt_h },
8009 { .fno = gen_helper_sve2_uqrshrnt_s },
8010 { .fno = gen_helper_sve2_uqrshrnt_d },
8011 };
8012 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 8013}
b87dbeeb 8014
40d5ea50
SL
8015#define DO_SVE2_ZZZ_NARROW(NAME, name) \
8016static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
8017{ \
8018 static gen_helper_gvec_3 * const fns[4] = { \
8019 NULL, gen_helper_sve2_##name##_h, \
8020 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
8021 }; \
8022 return do_sve2_zzz_ool(s, a, fns[a->esz]); \
8023}
8024
8025DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8026DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
8027DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8028DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 8029
c3cd6766
SL
8030DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8031DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
8032DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8033DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 8034
e0ae6ec3
SL
8035static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8036 gen_helper_gvec_flags_4 *fn)
8037{
8038 if (!dc_isar_feature(aa64_sve2, s)) {
8039 return false;
8040 }
8041 return do_ppzz_flags(s, a, fn);
8042}
8043
8044#define DO_SVE2_PPZZ_MATCH(NAME, name) \
8045static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8046{ \
8047 static gen_helper_gvec_flags_4 * const fns[4] = { \
8048 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
8049 NULL, NULL \
8050 }; \
8051 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
8052}
8053
8054DO_SVE2_PPZZ_MATCH(MATCH, match)
8055DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8056
7d47ac94
SL
8057static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8058{
8059 static gen_helper_gvec_4 * const fns[2] = {
8060 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8061 };
8062 if (a->esz < 2) {
8063 return false;
8064 }
8065 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8066}
8067
8068static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8069{
8070 if (a->esz != 0) {
8071 return false;
8072 }
8073 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8074}
8075
b87dbeeb
SL
8076static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8077 gen_helper_gvec_4_ptr *fn)
8078{
8079 if (!dc_isar_feature(aa64_sve2, s)) {
8080 return false;
8081 }
8082 return do_zpzz_fp(s, a, fn);
8083}
8084
8085#define DO_SVE2_ZPZZ_FP(NAME, name) \
8086static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8087{ \
8088 static gen_helper_gvec_4_ptr * const fns[4] = { \
8089 NULL, gen_helper_sve2_##name##_zpzz_h, \
8090 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
8091 }; \
8092 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
8093}
8094
8095DO_SVE2_ZPZZ_FP(FADDP, faddp)
8096DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8097DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8098DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8099DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
8100
8101/*
8102 * SVE Integer Multiply-Add (unpredicated)
8103 */
8104
4f26756b
SL
8105static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8106{
8107 gen_helper_gvec_4_ptr *fn;
8108
8109 switch (a->esz) {
8110 case MO_32:
8111 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8112 return false;
8113 }
8114 fn = gen_helper_fmmla_s;
8115 break;
8116 case MO_64:
8117 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8118 return false;
8119 }
8120 fn = gen_helper_fmmla_d;
8121 break;
8122 default:
8123 return false;
8124 }
8125
8126 if (sve_access_check(s)) {
8127 unsigned vsz = vec_full_reg_size(s);
8128 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8129 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8130 vec_full_reg_offset(s, a->rn),
8131 vec_full_reg_offset(s, a->rm),
8132 vec_full_reg_offset(s, a->ra),
8133 status, vsz, vsz, 0, fn);
8134 tcg_temp_free_ptr(status);
8135 }
8136 return true;
8137}
8138
bfc9307e
RH
8139static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8140 bool sel1, bool sel2)
8141{
8142 static gen_helper_gvec_4 * const fns[] = {
8143 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
8144 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8145 };
8146 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8147}
8148
8149static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8150 bool sel1, bool sel2)
8151{
8152 static gen_helper_gvec_4 * const fns[] = {
8153 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
8154 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8155 };
8156 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8157}
8158
8159static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8160{
8161 return do_sqdmlal_zzzw(s, a, false, false);
8162}
8163
8164static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8165{
8166 return do_sqdmlal_zzzw(s, a, true, true);
8167}
8168
8169static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8170{
8171 return do_sqdmlal_zzzw(s, a, false, true);
8172}
8173
8174static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8175{
8176 return do_sqdmlsl_zzzw(s, a, false, false);
8177}
8178
8179static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8180{
8181 return do_sqdmlsl_zzzw(s, a, true, true);
8182}
8183
8184static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8185{
8186 return do_sqdmlsl_zzzw(s, a, false, true);
8187}
ab3ddf31
RH
8188
8189static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8190{
8191 static gen_helper_gvec_4 * const fns[] = {
8192 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8193 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8194 };
8195 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8196}
8197
8198static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8199{
8200 static gen_helper_gvec_4 * const fns[] = {
8201 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8202 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8203 };
8204 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8205}
45a32e80
RH
8206
8207static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8208{
8209 static gen_helper_gvec_4 * const fns[] = {
8210 NULL, gen_helper_sve2_smlal_zzzw_h,
8211 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8212 };
8213 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8214}
8215
8216static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8217{
8218 return do_smlal_zzzw(s, a, false);
8219}
8220
8221static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8222{
8223 return do_smlal_zzzw(s, a, true);
8224}
8225
8226static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8227{
8228 static gen_helper_gvec_4 * const fns[] = {
8229 NULL, gen_helper_sve2_umlal_zzzw_h,
8230 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8231 };
8232 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8233}
8234
8235static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8236{
8237 return do_umlal_zzzw(s, a, false);
8238}
8239
8240static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8241{
8242 return do_umlal_zzzw(s, a, true);
8243}
8244
8245static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8246{
8247 static gen_helper_gvec_4 * const fns[] = {
8248 NULL, gen_helper_sve2_smlsl_zzzw_h,
8249 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8250 };
8251 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8252}
8253
8254static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8255{
8256 return do_smlsl_zzzw(s, a, false);
8257}
8258
8259static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8260{
8261 return do_smlsl_zzzw(s, a, true);
8262}
8263
8264static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8265{
8266 static gen_helper_gvec_4 * const fns[] = {
8267 NULL, gen_helper_sve2_umlsl_zzzw_h,
8268 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8269 };
8270 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8271}
8272
8273static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8274{
8275 return do_umlsl_zzzw(s, a, false);
8276}
8277
8278static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8279{
8280 return do_umlsl_zzzw(s, a, true);
8281}
d782d3ca
RH
8282
8283static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8284{
8285 static gen_helper_gvec_4 * const fns[] = {
8286 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8287 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8288 };
8289
8290 if (!dc_isar_feature(aa64_sve2, s)) {
8291 return false;
8292 }
8293 if (sve_access_check(s)) {
8294 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8295 }
8296 return true;
8297}
8298
21068f39
RH
8299static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8300{
8301 if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8302 return false;
8303 }
8304 if (sve_access_check(s)) {
8305 gen_helper_gvec_4 *fn = (a->esz == MO_32
8306 ? gen_helper_sve2_cdot_zzzz_s
8307 : gen_helper_sve2_cdot_zzzz_d);
8308 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8309 }
8310 return true;
8311}
8312
d782d3ca
RH
8313static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8314{
8315 static gen_helper_gvec_4 * const fns[] = {
8316 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8317 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8318 };
8319
8320 if (!dc_isar_feature(aa64_sve2, s)) {
8321 return false;
8322 }
8323 if (sve_access_check(s)) {
8324 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8325 }
8326 return true;
8327}
6a98cb2a
RH
8328
8329static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8330{
8331 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8332 return false;
8333 }
8334 if (sve_access_check(s)) {
8335 unsigned vsz = vec_full_reg_size(s);
8336 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8337 vec_full_reg_offset(s, a->rn),
8338 vec_full_reg_offset(s, a->rm),
8339 vec_full_reg_offset(s, a->ra),
8340 vsz, vsz, 0, gen_helper_gvec_usdot_b);
8341 }
8342 return true;
8343}
b2bcd1be 8344
0ea3cdbf
RH
8345TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
8346 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e
RH
8347
8348static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8349{
8350 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8351 return false;
8352 }
84a272f5 8353 return gen_gvec_ool_arg_zzz(s, gen_helper_crypto_aese, a, decrypt);
3cc7a88e
RH
8354}
8355
8356static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8357{
8358 return do_aese(s, a, false);
8359}
8360
8361static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8362{
8363 return do_aese(s, a, true);
8364}
8365
8366static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8367{
8368 if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8369 return false;
8370 }
84a272f5 8371 return gen_gvec_ool_arg_zzz(s, fn, a, 0);
3cc7a88e
RH
8372}
8373
8374static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8375{
8376 return do_sm4(s, a, gen_helper_crypto_sm4e);
8377}
3358eb3f
RH
8378
8379static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8380{
8381 return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8382}
8383
8384static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8385{
8386 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8387 return false;
8388 }
8389 if (sve_access_check(s)) {
8390 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8391 }
8392 return true;
8393}
5c1b7226
RH
8394
8395static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8396{
8397 if (!dc_isar_feature(aa64_sve2, s)) {
8398 return false;
8399 }
8400 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8401}
8402
d29b17ca
RH
8403static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8404{
8405 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8406 return false;
8407 }
8408 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8409}
8410
5c1b7226
RH
8411static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8412{
8413 if (!dc_isar_feature(aa64_sve2, s)) {
8414 return false;
8415 }
8416 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8417}
83c2523f
SL
8418
8419static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8420{
8421 if (!dc_isar_feature(aa64_sve2, s)) {
8422 return false;
8423 }
8424 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8425}
8426
8427static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8428{
8429 if (!dc_isar_feature(aa64_sve2, s)) {
8430 return false;
8431 }
8432 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8433}
95365277
SL
8434
8435static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8436{
8437 if (!dc_isar_feature(aa64_sve2, s)) {
8438 return false;
8439 }
8440 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8441}
8442
8443static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8444{
8445 if (!dc_isar_feature(aa64_sve2, s)) {
8446 return false;
8447 }
8448 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8449}
631be02e
SL
8450
8451static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8452{
8453 static gen_helper_gvec_3_ptr * const fns[] = {
8454 NULL, gen_helper_flogb_h,
8455 gen_helper_flogb_s, gen_helper_flogb_d
8456 };
8457
8458 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8459 return false;
8460 }
8461 if (sve_access_check(s)) {
8462 TCGv_ptr status =
8463 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8464 unsigned vsz = vec_full_reg_size(s);
8465
8466 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8467 vec_full_reg_offset(s, a->rn),
8468 pred_full_reg_offset(s, a->pg),
8469 status, vsz, vsz, 0, fns[a->esz]);
8470 tcg_temp_free_ptr(status);
8471 }
8472 return true;
8473}
50d102bd
SL
8474
8475static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8476{
8477 if (!dc_isar_feature(aa64_sve2, s)) {
8478 return false;
8479 }
8480 if (sve_access_check(s)) {
8481 unsigned vsz = vec_full_reg_size(s);
8482 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8483 vec_full_reg_offset(s, a->rn),
8484 vec_full_reg_offset(s, a->rm),
8485 vec_full_reg_offset(s, a->ra),
8486 cpu_env, vsz, vsz, (sel << 1) | sub,
8487 gen_helper_sve2_fmlal_zzzw_s);
8488 }
8489 return true;
8490}
8491
8492static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8493{
8494 return do_FMLAL_zzzw(s, a, false, false);
8495}
8496
8497static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8498{
8499 return do_FMLAL_zzzw(s, a, false, true);
8500}
8501
8502static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8503{
8504 return do_FMLAL_zzzw(s, a, true, false);
8505}
8506
8507static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8508{
8509 return do_FMLAL_zzzw(s, a, true, true);
8510}
8511
8512static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8513{
8514 if (!dc_isar_feature(aa64_sve2, s)) {
8515 return false;
8516 }
8517 if (sve_access_check(s)) {
8518 unsigned vsz = vec_full_reg_size(s);
8519 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8520 vec_full_reg_offset(s, a->rn),
8521 vec_full_reg_offset(s, a->rm),
8522 vec_full_reg_offset(s, a->ra),
8523 cpu_env, vsz, vsz,
8524 (a->index << 2) | (sel << 1) | sub,
8525 gen_helper_sve2_fmlal_zzxw_s);
8526 }
8527 return true;
8528}
8529
8530static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8531{
8532 return do_FMLAL_zzxw(s, a, false, false);
8533}
8534
8535static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8536{
8537 return do_FMLAL_zzxw(s, a, false, true);
8538}
8539
8540static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8541{
8542 return do_FMLAL_zzxw(s, a, true, false);
8543}
8544
8545static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8546{
8547 return do_FMLAL_zzxw(s, a, true, true);
8548}
2323c5ff
RH
8549
8550static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8551 gen_helper_gvec_4 *fn, int data)
8552{
8553 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8554 return false;
8555 }
8556 if (sve_access_check(s)) {
8557 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8558 }
8559 return true;
8560}
8561
8562static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8563{
8564 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8565}
8566
8567static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8568{
8569 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8570}
8571
8572static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8573{
8574 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8575}
cb8657f7
RH
8576
8577static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8578{
8579 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8580 return false;
8581 }
8582 if (sve_access_check(s)) {
8583 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8584 a->rd, a->rn, a->rm, a->ra, 0);
8585 }
8586 return true;
8587}
83914478
RH
8588
8589static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8590{
8591 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8592 return false;
8593 }
8594 if (sve_access_check(s)) {
8595 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8596 a->rd, a->rn, a->rm, a->ra, a->index);
8597 }
8598 return true;
8599}
81266a1f
RH
8600
8601static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8602{
8603 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8604 return false;
8605 }
8606 if (sve_access_check(s)) {
8607 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8608 a->rd, a->rn, a->rm, a->ra, 0);
8609 }
8610 return true;
8611}
5693887f
RH
8612
8613static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8614{
8615 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8616 return false;
8617 }
8618 if (sve_access_check(s)) {
8619 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8620 unsigned vsz = vec_full_reg_size(s);
8621
8622 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8623 vec_full_reg_offset(s, a->rn),
8624 vec_full_reg_offset(s, a->rm),
8625 vec_full_reg_offset(s, a->ra),
8626 status, vsz, vsz, sel,
8627 gen_helper_gvec_bfmlal);
8628 tcg_temp_free_ptr(status);
8629 }
8630 return true;
8631}
8632
8633static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8634{
8635 return do_BFMLAL_zzzw(s, a, false);
8636}
8637
8638static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8639{
8640 return do_BFMLAL_zzzw(s, a, true);
8641}
458d0ab6
RH
8642
8643static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8644{
8645 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8646 return false;
8647 }
8648 if (sve_access_check(s)) {
8649 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8650 unsigned vsz = vec_full_reg_size(s);
8651
8652 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8653 vec_full_reg_offset(s, a->rn),
8654 vec_full_reg_offset(s, a->rm),
8655 vec_full_reg_offset(s, a->ra),
8656 status, vsz, vsz, (a->index << 1) | sel,
8657 gen_helper_gvec_bfmlal_idx);
8658 tcg_temp_free_ptr(status);
8659 }
8660 return true;
8661}
8662
8663static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8664{
8665 return do_BFMLAL_zzxw(s, a, false);
8666}
8667
8668static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8669{
8670 return do_BFMLAL_zzxw(s, a, true);
8671}