]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for do_sve2_zzz_data
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638 178/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 179static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
180 int rd, int rn, int rm, int ra, int data)
181{
7ad416b1
RH
182 if (fn == NULL) {
183 return false;
184 }
185 if (sve_access_check(s)) {
186 unsigned vsz = vec_full_reg_size(s);
187 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
188 vec_full_reg_offset(s, rn),
189 vec_full_reg_offset(s, rm),
190 vec_full_reg_offset(s, ra),
191 vsz, vsz, data, fn);
192 }
193 return true;
38650638
RH
194}
195
cab79ac9
RH
196static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
197 arg_rrrr_esz *a, int data)
198{
199 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
200}
201
e82d3536
RH
202static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
203 arg_rrxr_esz *a)
204{
205 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
206}
207
96a461f7
RH
208/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
209static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
210 int rd, int rn, int pg, int data)
211{
212 unsigned vsz = vec_full_reg_size(s);
213 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
214 vec_full_reg_offset(s, rn),
215 pred_full_reg_offset(s, pg),
216 vsz, vsz, data, fn);
217}
218
36cbb7a8
RH
219/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
220static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
221 int rd, int rn, int rm, int pg, int data)
222{
223 unsigned vsz = vec_full_reg_size(s);
224 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
225 vec_full_reg_offset(s, rn),
226 vec_full_reg_offset(s, rm),
227 pred_full_reg_offset(s, pg),
228 vsz, vsz, data, fn);
229}
f7d79c41 230
36cbb7a8 231/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
232static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
233 int esz, int rd, int rn)
38388f7e 234{
f7d79c41
RH
235 unsigned vsz = vec_full_reg_size(s);
236 gvec_fn(esz, vec_full_reg_offset(s, rd),
237 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
238}
239
39eea561 240/* Invoke a vector expander on three Zregs. */
28c4da31
RH
241static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
242 int esz, int rd, int rn, int rm)
38388f7e 243{
28c4da31
RH
244 unsigned vsz = vec_full_reg_size(s);
245 gvec_fn(esz, vec_full_reg_offset(s, rd),
246 vec_full_reg_offset(s, rn),
247 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
248}
249
911cdc6d
RH
250/* Invoke a vector expander on four Zregs. */
251static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
252 int esz, int rd, int rn, int rm, int ra)
253{
254 unsigned vsz = vec_full_reg_size(s);
255 gvec_fn(esz, vec_full_reg_offset(s, rd),
256 vec_full_reg_offset(s, rn),
257 vec_full_reg_offset(s, rm),
258 vec_full_reg_offset(s, ra), vsz, vsz);
259}
260
39eea561
RH
261/* Invoke a vector move on two Zregs. */
262static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 263{
f7d79c41
RH
264 if (sve_access_check(s)) {
265 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
266 }
267 return true;
38388f7e
RH
268}
269
d9d78dcc
RH
270/* Initialize a Zreg with replications of a 64-bit immediate. */
271static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
272{
273 unsigned vsz = vec_full_reg_size(s);
8711e71f 274 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
275}
276
516e246a 277/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
278static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
279 int rd, int rn, int rm)
516e246a 280{
dd81a8d7
RH
281 unsigned psz = pred_gvec_reg_size(s);
282 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
283 pred_full_reg_offset(s, rn),
284 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
285}
286
287/* Invoke a vector move on two Pregs. */
288static bool do_mov_p(DisasContext *s, int rd, int rn)
289{
d0b2df5a
RH
290 if (sve_access_check(s)) {
291 unsigned psz = pred_gvec_reg_size(s);
292 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
293 pred_full_reg_offset(s, rn), psz, psz);
294 }
295 return true;
516e246a
RH
296}
297
9e18d7a6
RH
298/* Set the cpu flags as per a return from an SVE helper. */
299static void do_pred_flags(TCGv_i32 t)
300{
301 tcg_gen_mov_i32(cpu_NF, t);
302 tcg_gen_andi_i32(cpu_ZF, t, 2);
303 tcg_gen_andi_i32(cpu_CF, t, 1);
304 tcg_gen_movi_i32(cpu_VF, 0);
305}
306
307/* Subroutines computing the ARM PredTest psuedofunction. */
308static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
309{
310 TCGv_i32 t = tcg_temp_new_i32();
311
312 gen_helper_sve_predtest1(t, d, g);
313 do_pred_flags(t);
314 tcg_temp_free_i32(t);
315}
316
317static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
318{
319 TCGv_ptr dptr = tcg_temp_new_ptr();
320 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 321 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
322
323 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
324 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 325
392acacc 326 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
327 tcg_temp_free_ptr(dptr);
328 tcg_temp_free_ptr(gptr);
329
330 do_pred_flags(t);
331 tcg_temp_free_i32(t);
332}
333
028e2a7b
RH
334/* For each element size, the bits within a predicate word that are active. */
335const uint64_t pred_esz_masks[4] = {
336 0xffffffffffffffffull, 0x5555555555555555ull,
337 0x1111111111111111ull, 0x0101010101010101ull
338};
339
39eea561
RH
340/*
341 *** SVE Logical - Unpredicated Group
342 */
343
28c4da31
RH
344static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
345{
346 if (sve_access_check(s)) {
347 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
348 }
349 return true;
350}
351
3a7be554 352static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 353{
28c4da31 354 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
355}
356
3a7be554 357static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 358{
28c4da31 359 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
360}
361
3a7be554 362static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 363{
28c4da31 364 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
365}
366
3a7be554 367static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 368{
28c4da31 369 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 370}
d1822297 371
e6eba6e5
RH
372static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
373{
374 TCGv_i64 t = tcg_temp_new_i64();
375 uint64_t mask = dup_const(MO_8, 0xff >> sh);
376
377 tcg_gen_xor_i64(t, n, m);
378 tcg_gen_shri_i64(d, t, sh);
379 tcg_gen_shli_i64(t, t, 8 - sh);
380 tcg_gen_andi_i64(d, d, mask);
381 tcg_gen_andi_i64(t, t, ~mask);
382 tcg_gen_or_i64(d, d, t);
383 tcg_temp_free_i64(t);
384}
385
386static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
387{
388 TCGv_i64 t = tcg_temp_new_i64();
389 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
390
391 tcg_gen_xor_i64(t, n, m);
392 tcg_gen_shri_i64(d, t, sh);
393 tcg_gen_shli_i64(t, t, 16 - sh);
394 tcg_gen_andi_i64(d, d, mask);
395 tcg_gen_andi_i64(t, t, ~mask);
396 tcg_gen_or_i64(d, d, t);
397 tcg_temp_free_i64(t);
398}
399
400static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
401{
402 tcg_gen_xor_i32(d, n, m);
403 tcg_gen_rotri_i32(d, d, sh);
404}
405
406static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
407{
408 tcg_gen_xor_i64(d, n, m);
409 tcg_gen_rotri_i64(d, d, sh);
410}
411
412static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
413 TCGv_vec m, int64_t sh)
414{
415 tcg_gen_xor_vec(vece, d, n, m);
416 tcg_gen_rotri_vec(vece, d, d, sh);
417}
418
419void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
420 uint32_t rm_ofs, int64_t shift,
421 uint32_t opr_sz, uint32_t max_sz)
422{
423 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
424 static const GVecGen3i ops[4] = {
425 { .fni8 = gen_xar8_i64,
426 .fniv = gen_xar_vec,
427 .fno = gen_helper_sve2_xar_b,
428 .opt_opc = vecop,
429 .vece = MO_8 },
430 { .fni8 = gen_xar16_i64,
431 .fniv = gen_xar_vec,
432 .fno = gen_helper_sve2_xar_h,
433 .opt_opc = vecop,
434 .vece = MO_16 },
435 { .fni4 = gen_xar_i32,
436 .fniv = gen_xar_vec,
437 .fno = gen_helper_sve2_xar_s,
438 .opt_opc = vecop,
439 .vece = MO_32 },
440 { .fni8 = gen_xar_i64,
441 .fniv = gen_xar_vec,
442 .fno = gen_helper_gvec_xar_d,
443 .opt_opc = vecop,
444 .vece = MO_64 }
445 };
446 int esize = 8 << vece;
447
448 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
449 tcg_debug_assert(shift >= 0);
450 tcg_debug_assert(shift <= esize);
451 shift &= esize - 1;
452
453 if (shift == 0) {
454 /* xar with no rotate devolves to xor. */
455 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
456 } else {
457 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
458 shift, &ops[vece]);
459 }
460}
461
462static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
463{
464 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
465 return false;
466 }
467 if (sve_access_check(s)) {
468 unsigned vsz = vec_full_reg_size(s);
469 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
470 vec_full_reg_offset(s, a->rn),
471 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
472 }
473 return true;
474}
475
911cdc6d
RH
476static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
477{
478 if (!dc_isar_feature(aa64_sve2, s)) {
479 return false;
480 }
481 if (sve_access_check(s)) {
482 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
483 }
484 return true;
485}
486
487static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
488{
489 tcg_gen_xor_i64(d, n, m);
490 tcg_gen_xor_i64(d, d, k);
491}
492
493static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
494 TCGv_vec m, TCGv_vec k)
495{
496 tcg_gen_xor_vec(vece, d, n, m);
497 tcg_gen_xor_vec(vece, d, d, k);
498}
499
500static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
501 uint32_t a, uint32_t oprsz, uint32_t maxsz)
502{
503 static const GVecGen4 op = {
504 .fni8 = gen_eor3_i64,
505 .fniv = gen_eor3_vec,
506 .fno = gen_helper_sve2_eor3,
507 .vece = MO_64,
508 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
509 };
510 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
511}
512
513static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
514{
515 return do_sve2_zzzz_fn(s, a, gen_eor3);
516}
517
518static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
519{
520 tcg_gen_andc_i64(d, m, k);
521 tcg_gen_xor_i64(d, d, n);
522}
523
524static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
525 TCGv_vec m, TCGv_vec k)
526{
527 tcg_gen_andc_vec(vece, d, m, k);
528 tcg_gen_xor_vec(vece, d, d, n);
529}
530
531static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
532 uint32_t a, uint32_t oprsz, uint32_t maxsz)
533{
534 static const GVecGen4 op = {
535 .fni8 = gen_bcax_i64,
536 .fniv = gen_bcax_vec,
537 .fno = gen_helper_sve2_bcax,
538 .vece = MO_64,
539 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
540 };
541 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
542}
543
544static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
545{
546 return do_sve2_zzzz_fn(s, a, gen_bcax);
547}
548
549static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
550 uint32_t a, uint32_t oprsz, uint32_t maxsz)
551{
552 /* BSL differs from the generic bitsel in argument ordering. */
553 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
554}
555
556static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
557{
558 return do_sve2_zzzz_fn(s, a, gen_bsl);
559}
560
561static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
562{
563 tcg_gen_andc_i64(n, k, n);
564 tcg_gen_andc_i64(m, m, k);
565 tcg_gen_or_i64(d, n, m);
566}
567
568static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
569 TCGv_vec m, TCGv_vec k)
570{
571 if (TCG_TARGET_HAS_bitsel_vec) {
572 tcg_gen_not_vec(vece, n, n);
573 tcg_gen_bitsel_vec(vece, d, k, n, m);
574 } else {
575 tcg_gen_andc_vec(vece, n, k, n);
576 tcg_gen_andc_vec(vece, m, m, k);
577 tcg_gen_or_vec(vece, d, n, m);
578 }
579}
580
581static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
582 uint32_t a, uint32_t oprsz, uint32_t maxsz)
583{
584 static const GVecGen4 op = {
585 .fni8 = gen_bsl1n_i64,
586 .fniv = gen_bsl1n_vec,
587 .fno = gen_helper_sve2_bsl1n,
588 .vece = MO_64,
589 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
590 };
591 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
592}
593
594static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
595{
596 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
597}
598
599static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
600{
601 /*
602 * Z[dn] = (n & k) | (~m & ~k)
603 * = | ~(m | k)
604 */
605 tcg_gen_and_i64(n, n, k);
606 if (TCG_TARGET_HAS_orc_i64) {
607 tcg_gen_or_i64(m, m, k);
608 tcg_gen_orc_i64(d, n, m);
609 } else {
610 tcg_gen_nor_i64(m, m, k);
611 tcg_gen_or_i64(d, n, m);
612 }
613}
614
615static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
616 TCGv_vec m, TCGv_vec k)
617{
618 if (TCG_TARGET_HAS_bitsel_vec) {
619 tcg_gen_not_vec(vece, m, m);
620 tcg_gen_bitsel_vec(vece, d, k, n, m);
621 } else {
622 tcg_gen_and_vec(vece, n, n, k);
623 tcg_gen_or_vec(vece, m, m, k);
624 tcg_gen_orc_vec(vece, d, n, m);
625 }
626}
627
628static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
629 uint32_t a, uint32_t oprsz, uint32_t maxsz)
630{
631 static const GVecGen4 op = {
632 .fni8 = gen_bsl2n_i64,
633 .fniv = gen_bsl2n_vec,
634 .fno = gen_helper_sve2_bsl2n,
635 .vece = MO_64,
636 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
637 };
638 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
639}
640
641static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
642{
643 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
644}
645
646static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
647{
648 tcg_gen_and_i64(n, n, k);
649 tcg_gen_andc_i64(m, m, k);
650 tcg_gen_nor_i64(d, n, m);
651}
652
653static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
654 TCGv_vec m, TCGv_vec k)
655{
656 tcg_gen_bitsel_vec(vece, d, k, n, m);
657 tcg_gen_not_vec(vece, d, d);
658}
659
660static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
661 uint32_t a, uint32_t oprsz, uint32_t maxsz)
662{
663 static const GVecGen4 op = {
664 .fni8 = gen_nbsl_i64,
665 .fniv = gen_nbsl_vec,
666 .fno = gen_helper_sve2_nbsl,
667 .vece = MO_64,
668 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
669 };
670 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
671}
672
673static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
674{
675 return do_sve2_zzzz_fn(s, a, gen_nbsl);
676}
677
fea98f9c
RH
678/*
679 *** SVE Integer Arithmetic - Unpredicated Group
680 */
681
3a7be554 682static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 683{
28c4da31 684 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
685}
686
3a7be554 687static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 688{
28c4da31 689 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
690}
691
3a7be554 692static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 693{
28c4da31 694 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
695}
696
3a7be554 697static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 698{
28c4da31 699 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
700}
701
3a7be554 702static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 703{
28c4da31 704 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
705}
706
3a7be554 707static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 708{
28c4da31 709 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
710}
711
f97cfd59
RH
712/*
713 *** SVE Integer Arithmetic - Binary Predicated Group
714 */
715
716static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
717{
f97cfd59
RH
718 if (fn == NULL) {
719 return false;
720 }
721 if (sve_access_check(s)) {
36cbb7a8 722 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
723 }
724 return true;
725}
726
a2103582
RH
727/* Select active elememnts from Zn and inactive elements from Zm,
728 * storing the result in Zd.
729 */
730static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
731{
732 static gen_helper_gvec_4 * const fns[4] = {
733 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
734 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
735 };
36cbb7a8 736 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
737}
738
f97cfd59 739#define DO_ZPZZ(NAME, name) \
3a7be554 740static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
741{ \
742 static gen_helper_gvec_4 * const fns[4] = { \
743 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
744 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
745 }; \
746 return do_zpzz_ool(s, a, fns[a->esz]); \
747}
748
749DO_ZPZZ(AND, and)
750DO_ZPZZ(EOR, eor)
751DO_ZPZZ(ORR, orr)
752DO_ZPZZ(BIC, bic)
753
754DO_ZPZZ(ADD, add)
755DO_ZPZZ(SUB, sub)
756
757DO_ZPZZ(SMAX, smax)
758DO_ZPZZ(UMAX, umax)
759DO_ZPZZ(SMIN, smin)
760DO_ZPZZ(UMIN, umin)
761DO_ZPZZ(SABD, sabd)
762DO_ZPZZ(UABD, uabd)
763
764DO_ZPZZ(MUL, mul)
765DO_ZPZZ(SMULH, smulh)
766DO_ZPZZ(UMULH, umulh)
767
27721dbb
RH
768DO_ZPZZ(ASR, asr)
769DO_ZPZZ(LSR, lsr)
770DO_ZPZZ(LSL, lsl)
771
3a7be554 772static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
773{
774 static gen_helper_gvec_4 * const fns[4] = {
775 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
776 };
777 return do_zpzz_ool(s, a, fns[a->esz]);
778}
779
3a7be554 780static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
781{
782 static gen_helper_gvec_4 * const fns[4] = {
783 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
784 };
785 return do_zpzz_ool(s, a, fns[a->esz]);
786}
787
3a7be554 788static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
789{
790 if (sve_access_check(s)) {
791 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
792 }
793 return true;
794}
d3fe4a29 795
f97cfd59
RH
796#undef DO_ZPZZ
797
afac6d04
RH
798/*
799 *** SVE Integer Arithmetic - Unary Predicated Group
800 */
801
802static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
803{
804 if (fn == NULL) {
805 return false;
806 }
807 if (sve_access_check(s)) {
96a461f7 808 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
809 }
810 return true;
811}
812
813#define DO_ZPZ(NAME, name) \
3a7be554 814static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
815{ \
816 static gen_helper_gvec_3 * const fns[4] = { \
817 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
818 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
819 }; \
820 return do_zpz_ool(s, a, fns[a->esz]); \
821}
822
823DO_ZPZ(CLS, cls)
824DO_ZPZ(CLZ, clz)
825DO_ZPZ(CNT_zpz, cnt_zpz)
826DO_ZPZ(CNOT, cnot)
827DO_ZPZ(NOT_zpz, not_zpz)
828DO_ZPZ(ABS, abs)
829DO_ZPZ(NEG, neg)
830
3a7be554 831static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
832{
833 static gen_helper_gvec_3 * const fns[4] = {
834 NULL,
835 gen_helper_sve_fabs_h,
836 gen_helper_sve_fabs_s,
837 gen_helper_sve_fabs_d
838 };
839 return do_zpz_ool(s, a, fns[a->esz]);
840}
841
3a7be554 842static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
843{
844 static gen_helper_gvec_3 * const fns[4] = {
845 NULL,
846 gen_helper_sve_fneg_h,
847 gen_helper_sve_fneg_s,
848 gen_helper_sve_fneg_d
849 };
850 return do_zpz_ool(s, a, fns[a->esz]);
851}
852
3a7be554 853static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
854{
855 static gen_helper_gvec_3 * const fns[4] = {
856 NULL,
857 gen_helper_sve_sxtb_h,
858 gen_helper_sve_sxtb_s,
859 gen_helper_sve_sxtb_d
860 };
861 return do_zpz_ool(s, a, fns[a->esz]);
862}
863
3a7be554 864static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
865{
866 static gen_helper_gvec_3 * const fns[4] = {
867 NULL,
868 gen_helper_sve_uxtb_h,
869 gen_helper_sve_uxtb_s,
870 gen_helper_sve_uxtb_d
871 };
872 return do_zpz_ool(s, a, fns[a->esz]);
873}
874
3a7be554 875static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
876{
877 static gen_helper_gvec_3 * const fns[4] = {
878 NULL, NULL,
879 gen_helper_sve_sxth_s,
880 gen_helper_sve_sxth_d
881 };
882 return do_zpz_ool(s, a, fns[a->esz]);
883}
884
3a7be554 885static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
886{
887 static gen_helper_gvec_3 * const fns[4] = {
888 NULL, NULL,
889 gen_helper_sve_uxth_s,
890 gen_helper_sve_uxth_d
891 };
892 return do_zpz_ool(s, a, fns[a->esz]);
893}
894
3a7be554 895static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
896{
897 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
898}
899
3a7be554 900static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
901{
902 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
903}
904
905#undef DO_ZPZ
906
047cec97
RH
907/*
908 *** SVE Integer Reduction Group
909 */
910
911typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
912static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
913 gen_helper_gvec_reduc *fn)
914{
915 unsigned vsz = vec_full_reg_size(s);
916 TCGv_ptr t_zn, t_pg;
917 TCGv_i32 desc;
918 TCGv_i64 temp;
919
920 if (fn == NULL) {
921 return false;
922 }
923 if (!sve_access_check(s)) {
924 return true;
925 }
926
c6a59b55 927 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
928 temp = tcg_temp_new_i64();
929 t_zn = tcg_temp_new_ptr();
930 t_pg = tcg_temp_new_ptr();
931
932 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
933 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
934 fn(temp, t_zn, t_pg, desc);
935 tcg_temp_free_ptr(t_zn);
936 tcg_temp_free_ptr(t_pg);
047cec97
RH
937
938 write_fp_dreg(s, a->rd, temp);
939 tcg_temp_free_i64(temp);
940 return true;
941}
942
943#define DO_VPZ(NAME, name) \
3a7be554 944static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
945{ \
946 static gen_helper_gvec_reduc * const fns[4] = { \
947 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
948 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
949 }; \
950 return do_vpz_ool(s, a, fns[a->esz]); \
951}
952
953DO_VPZ(ORV, orv)
954DO_VPZ(ANDV, andv)
955DO_VPZ(EORV, eorv)
956
957DO_VPZ(UADDV, uaddv)
958DO_VPZ(SMAXV, smaxv)
959DO_VPZ(UMAXV, umaxv)
960DO_VPZ(SMINV, sminv)
961DO_VPZ(UMINV, uminv)
962
3a7be554 963static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
964{
965 static gen_helper_gvec_reduc * const fns[4] = {
966 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
967 gen_helper_sve_saddv_s, NULL
968 };
969 return do_vpz_ool(s, a, fns[a->esz]);
970}
971
972#undef DO_VPZ
973
ccd841c3
RH
974/*
975 *** SVE Shift by Immediate - Predicated Group
976 */
977
60245996
RH
978/*
979 * Copy Zn into Zd, storing zeros into inactive elements.
980 * If invert, store zeros into the active elements.
ccd841c3 981 */
60245996
RH
982static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
983 int esz, bool invert)
ccd841c3 984{
60245996
RH
985 static gen_helper_gvec_3 * const fns[4] = {
986 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
987 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 988 };
60245996 989
ccd841c3 990 if (sve_access_check(s)) {
96a461f7 991 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
992 }
993 return true;
994}
995
996static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
997 gen_helper_gvec_3 *fn)
998{
999 if (sve_access_check(s)) {
96a461f7 1000 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
1001 }
1002 return true;
1003}
1004
3a7be554 1005static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1006{
1007 static gen_helper_gvec_3 * const fns[4] = {
1008 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1009 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1010 };
1011 if (a->esz < 0) {
1012 /* Invalid tsz encoding -- see tszimm_esz. */
1013 return false;
1014 }
1015 /* Shift by element size is architecturally valid. For
1016 arithmetic right-shift, it's the same as by one less. */
1017 a->imm = MIN(a->imm, (8 << a->esz) - 1);
1018 return do_zpzi_ool(s, a, fns[a->esz]);
1019}
1020
3a7be554 1021static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1022{
1023 static gen_helper_gvec_3 * const fns[4] = {
1024 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1025 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1026 };
1027 if (a->esz < 0) {
1028 return false;
1029 }
1030 /* Shift by element size is architecturally valid.
1031 For logical shifts, it is a zeroing operation. */
1032 if (a->imm >= (8 << a->esz)) {
60245996 1033 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1034 } else {
1035 return do_zpzi_ool(s, a, fns[a->esz]);
1036 }
1037}
1038
3a7be554 1039static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1040{
1041 static gen_helper_gvec_3 * const fns[4] = {
1042 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1043 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1044 };
1045 if (a->esz < 0) {
1046 return false;
1047 }
1048 /* Shift by element size is architecturally valid.
1049 For logical shifts, it is a zeroing operation. */
1050 if (a->imm >= (8 << a->esz)) {
60245996 1051 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1052 } else {
1053 return do_zpzi_ool(s, a, fns[a->esz]);
1054 }
1055}
1056
3a7be554 1057static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1058{
1059 static gen_helper_gvec_3 * const fns[4] = {
1060 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1061 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1062 };
1063 if (a->esz < 0) {
1064 return false;
1065 }
1066 /* Shift by element size is architecturally valid. For arithmetic
1067 right shift for division, it is a zeroing operation. */
1068 if (a->imm >= (8 << a->esz)) {
60245996 1069 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1070 } else {
1071 return do_zpzi_ool(s, a, fns[a->esz]);
1072 }
1073}
1074
a5421b54
SL
1075static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1076{
1077 static gen_helper_gvec_3 * const fns[4] = {
1078 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1079 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1080 };
1081 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1082 return false;
1083 }
1084 return do_zpzi_ool(s, a, fns[a->esz]);
1085}
1086
1087static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1088{
1089 static gen_helper_gvec_3 * const fns[4] = {
1090 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1091 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1092 };
1093 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1094 return false;
1095 }
1096 return do_zpzi_ool(s, a, fns[a->esz]);
1097}
1098
1099static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1100{
1101 static gen_helper_gvec_3 * const fns[4] = {
1102 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1103 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1104 };
1105 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1106 return false;
1107 }
1108 return do_zpzi_ool(s, a, fns[a->esz]);
1109}
1110
1111static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1112{
1113 static gen_helper_gvec_3 * const fns[4] = {
1114 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1115 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1116 };
1117 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1118 return false;
1119 }
1120 return do_zpzi_ool(s, a, fns[a->esz]);
1121}
1122
1123static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1124{
1125 static gen_helper_gvec_3 * const fns[4] = {
1126 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1127 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1128 };
1129 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1130 return false;
1131 }
1132 return do_zpzi_ool(s, a, fns[a->esz]);
1133}
1134
fe7f8dfb
RH
1135/*
1136 *** SVE Bitwise Shift - Predicated Group
1137 */
1138
1139#define DO_ZPZW(NAME, name) \
3a7be554 1140static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1141{ \
1142 static gen_helper_gvec_4 * const fns[3] = { \
1143 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1144 gen_helper_sve_##name##_zpzw_s, \
1145 }; \
1146 if (a->esz < 0 || a->esz >= 3) { \
1147 return false; \
1148 } \
1149 return do_zpzz_ool(s, a, fns[a->esz]); \
1150}
1151
1152DO_ZPZW(ASR, asr)
1153DO_ZPZW(LSR, lsr)
1154DO_ZPZW(LSL, lsl)
1155
1156#undef DO_ZPZW
1157
d9d78dcc
RH
1158/*
1159 *** SVE Bitwise Shift - Unpredicated Group
1160 */
1161
1162static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1163 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1164 int64_t, uint32_t, uint32_t))
1165{
1166 if (a->esz < 0) {
1167 /* Invalid tsz encoding -- see tszimm_esz. */
1168 return false;
1169 }
1170 if (sve_access_check(s)) {
1171 unsigned vsz = vec_full_reg_size(s);
1172 /* Shift by element size is architecturally valid. For
1173 arithmetic right-shift, it's the same as by one less.
1174 Otherwise it is a zeroing operation. */
1175 if (a->imm >= 8 << a->esz) {
1176 if (asr) {
1177 a->imm = (8 << a->esz) - 1;
1178 } else {
1179 do_dupi_z(s, a->rd, 0);
1180 return true;
1181 }
1182 }
1183 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1184 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1185 }
1186 return true;
1187}
1188
3a7be554 1189static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1190{
1191 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1192}
1193
3a7be554 1194static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1195{
1196 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1197}
1198
3a7be554 1199static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1200{
1201 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1202}
1203
d9d78dcc 1204#define DO_ZZW(NAME, name) \
32e2ad65 1205 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1206 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1207 gen_helper_sve_##name##_zzw_s, NULL \
1208 }; \
32e2ad65
RH
1209 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1210 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1211
32e2ad65
RH
1212DO_ZZW(ASR_zzw, asr)
1213DO_ZZW(LSR_zzw, lsr)
1214DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1215
1216#undef DO_ZZW
1217
96a36e4a
RH
1218/*
1219 *** SVE Integer Multiply-Add Group
1220 */
1221
1222static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1223 gen_helper_gvec_5 *fn)
1224{
1225 if (sve_access_check(s)) {
1226 unsigned vsz = vec_full_reg_size(s);
1227 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1228 vec_full_reg_offset(s, a->ra),
1229 vec_full_reg_offset(s, a->rn),
1230 vec_full_reg_offset(s, a->rm),
1231 pred_full_reg_offset(s, a->pg),
1232 vsz, vsz, 0, fn);
1233 }
1234 return true;
1235}
1236
1237#define DO_ZPZZZ(NAME, name) \
3a7be554 1238static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1239{ \
1240 static gen_helper_gvec_5 * const fns[4] = { \
1241 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1242 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1243 }; \
1244 return do_zpzzz_ool(s, a, fns[a->esz]); \
1245}
1246
1247DO_ZPZZZ(MLA, mla)
1248DO_ZPZZZ(MLS, mls)
1249
1250#undef DO_ZPZZZ
1251
9a56c9c3
RH
1252/*
1253 *** SVE Index Generation Group
1254 */
1255
1256static void do_index(DisasContext *s, int esz, int rd,
1257 TCGv_i64 start, TCGv_i64 incr)
1258{
1259 unsigned vsz = vec_full_reg_size(s);
c6a59b55 1260 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
9a56c9c3
RH
1261 TCGv_ptr t_zd = tcg_temp_new_ptr();
1262
1263 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1264 if (esz == 3) {
1265 gen_helper_sve_index_d(t_zd, start, incr, desc);
1266 } else {
1267 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1268 static index_fn * const fns[3] = {
1269 gen_helper_sve_index_b,
1270 gen_helper_sve_index_h,
1271 gen_helper_sve_index_s,
1272 };
1273 TCGv_i32 s32 = tcg_temp_new_i32();
1274 TCGv_i32 i32 = tcg_temp_new_i32();
1275
1276 tcg_gen_extrl_i64_i32(s32, start);
1277 tcg_gen_extrl_i64_i32(i32, incr);
1278 fns[esz](t_zd, s32, i32, desc);
1279
1280 tcg_temp_free_i32(s32);
1281 tcg_temp_free_i32(i32);
1282 }
1283 tcg_temp_free_ptr(t_zd);
9a56c9c3
RH
1284}
1285
3a7be554 1286static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1287{
1288 if (sve_access_check(s)) {
b0c3aece
RH
1289 TCGv_i64 start = tcg_constant_i64(a->imm1);
1290 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1291 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1292 }
1293 return true;
1294}
1295
3a7be554 1296static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1297{
1298 if (sve_access_check(s)) {
b0c3aece 1299 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1300 TCGv_i64 incr = cpu_reg(s, a->rm);
1301 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1302 }
1303 return true;
1304}
1305
3a7be554 1306static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1307{
1308 if (sve_access_check(s)) {
1309 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1310 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1311 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1312 }
1313 return true;
1314}
1315
3a7be554 1316static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1317{
1318 if (sve_access_check(s)) {
1319 TCGv_i64 start = cpu_reg(s, a->rn);
1320 TCGv_i64 incr = cpu_reg(s, a->rm);
1321 do_index(s, a->esz, a->rd, start, incr);
1322 }
1323 return true;
1324}
1325
96f922cc
RH
1326/*
1327 *** SVE Stack Allocation Group
1328 */
1329
3a7be554 1330static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1331{
5de56742
AC
1332 if (sve_access_check(s)) {
1333 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1334 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1335 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1336 }
96f922cc
RH
1337 return true;
1338}
1339
3a7be554 1340static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1341{
5de56742
AC
1342 if (sve_access_check(s)) {
1343 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1344 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1345 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1346 }
96f922cc
RH
1347 return true;
1348}
1349
3a7be554 1350static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1351{
5de56742
AC
1352 if (sve_access_check(s)) {
1353 TCGv_i64 reg = cpu_reg(s, a->rd);
1354 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1355 }
96f922cc
RH
1356 return true;
1357}
1358
4b242d9c
RH
1359/*
1360 *** SVE Compute Vector Address Group
1361 */
1362
1363static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1364{
913a8a00 1365 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1366}
1367
3a7be554 1368static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1369{
1370 return do_adr(s, a, gen_helper_sve_adr_p32);
1371}
1372
3a7be554 1373static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1374{
1375 return do_adr(s, a, gen_helper_sve_adr_p64);
1376}
1377
3a7be554 1378static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1379{
1380 return do_adr(s, a, gen_helper_sve_adr_s32);
1381}
1382
3a7be554 1383static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1384{
1385 return do_adr(s, a, gen_helper_sve_adr_u32);
1386}
1387
0762cd42
RH
1388/*
1389 *** SVE Integer Misc - Unpredicated Group
1390 */
1391
0ea3cdbf
RH
1392static gen_helper_gvec_2 * const fexpa_fns[4] = {
1393 NULL, gen_helper_sve_fexpa_h,
1394 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1395};
1396TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1397 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1398
32e2ad65
RH
1399static gen_helper_gvec_3 * const ftssel_fns[4] = {
1400 NULL, gen_helper_sve_ftssel_h,
1401 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1402};
1403TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1404
516e246a
RH
1405/*
1406 *** SVE Predicate Logical Operations Group
1407 */
1408
1409static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1410 const GVecGen4 *gvec_op)
1411{
1412 if (!sve_access_check(s)) {
1413 return true;
1414 }
1415
1416 unsigned psz = pred_gvec_reg_size(s);
1417 int dofs = pred_full_reg_offset(s, a->rd);
1418 int nofs = pred_full_reg_offset(s, a->rn);
1419 int mofs = pred_full_reg_offset(s, a->rm);
1420 int gofs = pred_full_reg_offset(s, a->pg);
1421
dd81a8d7
RH
1422 if (!a->s) {
1423 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1424 return true;
1425 }
1426
516e246a
RH
1427 if (psz == 8) {
1428 /* Do the operation and the flags generation in temps. */
1429 TCGv_i64 pd = tcg_temp_new_i64();
1430 TCGv_i64 pn = tcg_temp_new_i64();
1431 TCGv_i64 pm = tcg_temp_new_i64();
1432 TCGv_i64 pg = tcg_temp_new_i64();
1433
1434 tcg_gen_ld_i64(pn, cpu_env, nofs);
1435 tcg_gen_ld_i64(pm, cpu_env, mofs);
1436 tcg_gen_ld_i64(pg, cpu_env, gofs);
1437
1438 gvec_op->fni8(pd, pn, pm, pg);
1439 tcg_gen_st_i64(pd, cpu_env, dofs);
1440
1441 do_predtest1(pd, pg);
1442
1443 tcg_temp_free_i64(pd);
1444 tcg_temp_free_i64(pn);
1445 tcg_temp_free_i64(pm);
1446 tcg_temp_free_i64(pg);
1447 } else {
1448 /* The operation and flags generation is large. The computation
1449 * of the flags depends on the original contents of the guarding
1450 * predicate. If the destination overwrites the guarding predicate,
1451 * then the easiest way to get this right is to save a copy.
1452 */
1453 int tofs = gofs;
1454 if (a->rd == a->pg) {
1455 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1456 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1457 }
1458
1459 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1460 do_predtest(s, dofs, tofs, psz / 8);
1461 }
1462 return true;
1463}
1464
1465static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1466{
1467 tcg_gen_and_i64(pd, pn, pm);
1468 tcg_gen_and_i64(pd, pd, pg);
1469}
1470
1471static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1472 TCGv_vec pm, TCGv_vec pg)
1473{
1474 tcg_gen_and_vec(vece, pd, pn, pm);
1475 tcg_gen_and_vec(vece, pd, pd, pg);
1476}
1477
3a7be554 1478static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1479{
1480 static const GVecGen4 op = {
1481 .fni8 = gen_and_pg_i64,
1482 .fniv = gen_and_pg_vec,
1483 .fno = gen_helper_sve_and_pppp,
1484 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1485 };
dd81a8d7
RH
1486
1487 if (!a->s) {
1488 if (!sve_access_check(s)) {
1489 return true;
1490 }
1491 if (a->rn == a->rm) {
1492 if (a->pg == a->rn) {
1493 do_mov_p(s, a->rd, a->rn);
1494 } else {
1495 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1496 }
1497 return true;
1498 } else if (a->pg == a->rn || a->pg == a->rm) {
1499 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1500 return true;
516e246a 1501 }
516e246a 1502 }
dd81a8d7 1503 return do_pppp_flags(s, a, &op);
516e246a
RH
1504}
1505
1506static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1507{
1508 tcg_gen_andc_i64(pd, pn, pm);
1509 tcg_gen_and_i64(pd, pd, pg);
1510}
1511
1512static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1513 TCGv_vec pm, TCGv_vec pg)
1514{
1515 tcg_gen_andc_vec(vece, pd, pn, pm);
1516 tcg_gen_and_vec(vece, pd, pd, pg);
1517}
1518
3a7be554 1519static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1520{
1521 static const GVecGen4 op = {
1522 .fni8 = gen_bic_pg_i64,
1523 .fniv = gen_bic_pg_vec,
1524 .fno = gen_helper_sve_bic_pppp,
1525 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1526 };
dd81a8d7
RH
1527
1528 if (!a->s && a->pg == a->rn) {
1529 if (sve_access_check(s)) {
1530 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1531 }
1532 return true;
516e246a 1533 }
dd81a8d7 1534 return do_pppp_flags(s, a, &op);
516e246a
RH
1535}
1536
1537static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1538{
1539 tcg_gen_xor_i64(pd, pn, pm);
1540 tcg_gen_and_i64(pd, pd, pg);
1541}
1542
1543static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1544 TCGv_vec pm, TCGv_vec pg)
1545{
1546 tcg_gen_xor_vec(vece, pd, pn, pm);
1547 tcg_gen_and_vec(vece, pd, pd, pg);
1548}
1549
3a7be554 1550static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1551{
1552 static const GVecGen4 op = {
1553 .fni8 = gen_eor_pg_i64,
1554 .fniv = gen_eor_pg_vec,
1555 .fno = gen_helper_sve_eor_pppp,
1556 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1557 };
dd81a8d7 1558 return do_pppp_flags(s, a, &op);
516e246a
RH
1559}
1560
3a7be554 1561static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1562{
516e246a
RH
1563 if (a->s) {
1564 return false;
516e246a 1565 }
d4bc6232
RH
1566 if (sve_access_check(s)) {
1567 unsigned psz = pred_gvec_reg_size(s);
1568 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1569 pred_full_reg_offset(s, a->pg),
1570 pred_full_reg_offset(s, a->rn),
1571 pred_full_reg_offset(s, a->rm), psz, psz);
1572 }
1573 return true;
516e246a
RH
1574}
1575
1576static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1577{
1578 tcg_gen_or_i64(pd, pn, pm);
1579 tcg_gen_and_i64(pd, pd, pg);
1580}
1581
1582static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1583 TCGv_vec pm, TCGv_vec pg)
1584{
1585 tcg_gen_or_vec(vece, pd, pn, pm);
1586 tcg_gen_and_vec(vece, pd, pd, pg);
1587}
1588
3a7be554 1589static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1590{
1591 static const GVecGen4 op = {
1592 .fni8 = gen_orr_pg_i64,
1593 .fniv = gen_orr_pg_vec,
1594 .fno = gen_helper_sve_orr_pppp,
1595 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1596 };
dd81a8d7
RH
1597
1598 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1599 return do_mov_p(s, a->rd, a->rn);
516e246a 1600 }
dd81a8d7 1601 return do_pppp_flags(s, a, &op);
516e246a
RH
1602}
1603
1604static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1605{
1606 tcg_gen_orc_i64(pd, pn, pm);
1607 tcg_gen_and_i64(pd, pd, pg);
1608}
1609
1610static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1611 TCGv_vec pm, TCGv_vec pg)
1612{
1613 tcg_gen_orc_vec(vece, pd, pn, pm);
1614 tcg_gen_and_vec(vece, pd, pd, pg);
1615}
1616
3a7be554 1617static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1618{
1619 static const GVecGen4 op = {
1620 .fni8 = gen_orn_pg_i64,
1621 .fniv = gen_orn_pg_vec,
1622 .fno = gen_helper_sve_orn_pppp,
1623 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1624 };
dd81a8d7 1625 return do_pppp_flags(s, a, &op);
516e246a
RH
1626}
1627
1628static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1629{
1630 tcg_gen_or_i64(pd, pn, pm);
1631 tcg_gen_andc_i64(pd, pg, pd);
1632}
1633
1634static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1635 TCGv_vec pm, TCGv_vec pg)
1636{
1637 tcg_gen_or_vec(vece, pd, pn, pm);
1638 tcg_gen_andc_vec(vece, pd, pg, pd);
1639}
1640
3a7be554 1641static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1642{
1643 static const GVecGen4 op = {
1644 .fni8 = gen_nor_pg_i64,
1645 .fniv = gen_nor_pg_vec,
1646 .fno = gen_helper_sve_nor_pppp,
1647 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1648 };
dd81a8d7 1649 return do_pppp_flags(s, a, &op);
516e246a
RH
1650}
1651
1652static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1653{
1654 tcg_gen_and_i64(pd, pn, pm);
1655 tcg_gen_andc_i64(pd, pg, pd);
1656}
1657
1658static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1659 TCGv_vec pm, TCGv_vec pg)
1660{
1661 tcg_gen_and_vec(vece, pd, pn, pm);
1662 tcg_gen_andc_vec(vece, pd, pg, pd);
1663}
1664
3a7be554 1665static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1666{
1667 static const GVecGen4 op = {
1668 .fni8 = gen_nand_pg_i64,
1669 .fniv = gen_nand_pg_vec,
1670 .fno = gen_helper_sve_nand_pppp,
1671 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1672 };
dd81a8d7 1673 return do_pppp_flags(s, a, &op);
516e246a
RH
1674}
1675
9e18d7a6
RH
1676/*
1677 *** SVE Predicate Misc Group
1678 */
1679
3a7be554 1680static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1681{
1682 if (sve_access_check(s)) {
1683 int nofs = pred_full_reg_offset(s, a->rn);
1684 int gofs = pred_full_reg_offset(s, a->pg);
1685 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1686
1687 if (words == 1) {
1688 TCGv_i64 pn = tcg_temp_new_i64();
1689 TCGv_i64 pg = tcg_temp_new_i64();
1690
1691 tcg_gen_ld_i64(pn, cpu_env, nofs);
1692 tcg_gen_ld_i64(pg, cpu_env, gofs);
1693 do_predtest1(pn, pg);
1694
1695 tcg_temp_free_i64(pn);
1696 tcg_temp_free_i64(pg);
1697 } else {
1698 do_predtest(s, nofs, gofs, words);
1699 }
1700 }
1701 return true;
1702}
1703
028e2a7b
RH
1704/* See the ARM pseudocode DecodePredCount. */
1705static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1706{
1707 unsigned elements = fullsz >> esz;
1708 unsigned bound;
1709
1710 switch (pattern) {
1711 case 0x0: /* POW2 */
1712 return pow2floor(elements);
1713 case 0x1: /* VL1 */
1714 case 0x2: /* VL2 */
1715 case 0x3: /* VL3 */
1716 case 0x4: /* VL4 */
1717 case 0x5: /* VL5 */
1718 case 0x6: /* VL6 */
1719 case 0x7: /* VL7 */
1720 case 0x8: /* VL8 */
1721 bound = pattern;
1722 break;
1723 case 0x9: /* VL16 */
1724 case 0xa: /* VL32 */
1725 case 0xb: /* VL64 */
1726 case 0xc: /* VL128 */
1727 case 0xd: /* VL256 */
1728 bound = 16 << (pattern - 9);
1729 break;
1730 case 0x1d: /* MUL4 */
1731 return elements - elements % 4;
1732 case 0x1e: /* MUL3 */
1733 return elements - elements % 3;
1734 case 0x1f: /* ALL */
1735 return elements;
1736 default: /* #uimm5 */
1737 return 0;
1738 }
1739 return elements >= bound ? bound : 0;
1740}
1741
1742/* This handles all of the predicate initialization instructions,
1743 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1744 * so that decode_pred_count returns 0. For SETFFR, we will have
1745 * set RD == 16 == FFR.
1746 */
1747static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1748{
1749 if (!sve_access_check(s)) {
1750 return true;
1751 }
1752
1753 unsigned fullsz = vec_full_reg_size(s);
1754 unsigned ofs = pred_full_reg_offset(s, rd);
1755 unsigned numelem, setsz, i;
1756 uint64_t word, lastword;
1757 TCGv_i64 t;
1758
1759 numelem = decode_pred_count(fullsz, pat, esz);
1760
1761 /* Determine what we must store into each bit, and how many. */
1762 if (numelem == 0) {
1763 lastword = word = 0;
1764 setsz = fullsz;
1765 } else {
1766 setsz = numelem << esz;
1767 lastword = word = pred_esz_masks[esz];
1768 if (setsz % 64) {
973558a3 1769 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1770 }
1771 }
1772
1773 t = tcg_temp_new_i64();
1774 if (fullsz <= 64) {
1775 tcg_gen_movi_i64(t, lastword);
1776 tcg_gen_st_i64(t, cpu_env, ofs);
1777 goto done;
1778 }
1779
1780 if (word == lastword) {
1781 unsigned maxsz = size_for_gvec(fullsz / 8);
1782 unsigned oprsz = size_for_gvec(setsz / 8);
1783
1784 if (oprsz * 8 == setsz) {
8711e71f 1785 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1786 goto done;
1787 }
028e2a7b
RH
1788 }
1789
1790 setsz /= 8;
1791 fullsz /= 8;
1792
1793 tcg_gen_movi_i64(t, word);
973558a3 1794 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1795 tcg_gen_st_i64(t, cpu_env, ofs + i);
1796 }
1797 if (lastword != word) {
1798 tcg_gen_movi_i64(t, lastword);
1799 tcg_gen_st_i64(t, cpu_env, ofs + i);
1800 i += 8;
1801 }
1802 if (i < fullsz) {
1803 tcg_gen_movi_i64(t, 0);
1804 for (; i < fullsz; i += 8) {
1805 tcg_gen_st_i64(t, cpu_env, ofs + i);
1806 }
1807 }
1808
1809 done:
1810 tcg_temp_free_i64(t);
1811
1812 /* PTRUES */
1813 if (setflag) {
1814 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1815 tcg_gen_movi_i32(cpu_CF, word == 0);
1816 tcg_gen_movi_i32(cpu_VF, 0);
1817 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1818 }
1819 return true;
1820}
1821
3a7be554 1822static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1823{
1824 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1825}
1826
3a7be554 1827static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1828{
1829 /* Note pat == 31 is #all, to set all elements. */
1830 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1831}
1832
3a7be554 1833static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1834{
1835 /* Note pat == 32 is #unimp, to set no elements. */
1836 return do_predset(s, 0, a->rd, 32, false);
1837}
1838
3a7be554 1839static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1840{
1841 /* The path through do_pppp_flags is complicated enough to want to avoid
1842 * duplication. Frob the arguments into the form of a predicated AND.
1843 */
1844 arg_rprr_s alt_a = {
1845 .rd = a->rd, .pg = a->pg, .s = a->s,
1846 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1847 };
3a7be554 1848 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1849}
1850
3a7be554 1851static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1852{
1853 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1854}
1855
3a7be554 1856static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1857{
1858 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1859}
1860
1861static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1862 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1863 TCGv_ptr, TCGv_i32))
1864{
1865 if (!sve_access_check(s)) {
1866 return true;
1867 }
1868
1869 TCGv_ptr t_pd = tcg_temp_new_ptr();
1870 TCGv_ptr t_pg = tcg_temp_new_ptr();
1871 TCGv_i32 t;
86300b5d 1872 unsigned desc = 0;
028e2a7b 1873
86300b5d
RH
1874 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1875 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1876
1877 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1878 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1879 t = tcg_temp_new_i32();
028e2a7b 1880
392acacc 1881 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1882 tcg_temp_free_ptr(t_pd);
1883 tcg_temp_free_ptr(t_pg);
1884
1885 do_pred_flags(t);
1886 tcg_temp_free_i32(t);
1887 return true;
1888}
1889
3a7be554 1890static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1891{
1892 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1893}
1894
3a7be554 1895static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1896{
1897 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1898}
1899
24e82e68
RH
1900/*
1901 *** SVE Element Count Group
1902 */
1903
1904/* Perform an inline saturating addition of a 32-bit value within
1905 * a 64-bit register. The second operand is known to be positive,
1906 * which halves the comparisions we must perform to bound the result.
1907 */
1908static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1909{
1910 int64_t ibound;
24e82e68
RH
1911
1912 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1913 if (u) {
1914 tcg_gen_ext32u_i64(reg, reg);
1915 } else {
1916 tcg_gen_ext32s_i64(reg, reg);
1917 }
1918 if (d) {
1919 tcg_gen_sub_i64(reg, reg, val);
1920 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1921 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1922 } else {
1923 tcg_gen_add_i64(reg, reg, val);
1924 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1925 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1926 }
24e82e68
RH
1927}
1928
1929/* Similarly with 64-bit values. */
1930static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1931{
1932 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1933 TCGv_i64 t2;
1934
1935 if (u) {
1936 if (d) {
1937 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1938 t2 = tcg_constant_i64(0);
1939 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1940 } else {
1941 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1942 t2 = tcg_constant_i64(-1);
1943 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1944 }
1945 } else {
35a1ec8e 1946 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1947 if (d) {
1948 /* Detect signed overflow for subtraction. */
1949 tcg_gen_xor_i64(t0, reg, val);
1950 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1951 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1952 tcg_gen_and_i64(t0, t0, reg);
1953
1954 /* Bound the result. */
1955 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1956 t2 = tcg_constant_i64(0);
24e82e68
RH
1957 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1958 } else {
1959 /* Detect signed overflow for addition. */
1960 tcg_gen_xor_i64(t0, reg, val);
1961 tcg_gen_add_i64(reg, reg, val);
1962 tcg_gen_xor_i64(t1, reg, val);
1963 tcg_gen_andc_i64(t0, t1, t0);
1964
1965 /* Bound the result. */
1966 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1967 t2 = tcg_constant_i64(0);
24e82e68
RH
1968 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1969 }
35a1ec8e 1970 tcg_temp_free_i64(t1);
24e82e68
RH
1971 }
1972 tcg_temp_free_i64(t0);
24e82e68
RH
1973}
1974
1975/* Similarly with a vector and a scalar operand. */
1976static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1977 TCGv_i64 val, bool u, bool d)
1978{
1979 unsigned vsz = vec_full_reg_size(s);
1980 TCGv_ptr dptr, nptr;
1981 TCGv_i32 t32, desc;
1982 TCGv_i64 t64;
1983
1984 dptr = tcg_temp_new_ptr();
1985 nptr = tcg_temp_new_ptr();
1986 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1987 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1988 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1989
1990 switch (esz) {
1991 case MO_8:
1992 t32 = tcg_temp_new_i32();
1993 tcg_gen_extrl_i64_i32(t32, val);
1994 if (d) {
1995 tcg_gen_neg_i32(t32, t32);
1996 }
1997 if (u) {
1998 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1999 } else {
2000 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2001 }
2002 tcg_temp_free_i32(t32);
2003 break;
2004
2005 case MO_16:
2006 t32 = tcg_temp_new_i32();
2007 tcg_gen_extrl_i64_i32(t32, val);
2008 if (d) {
2009 tcg_gen_neg_i32(t32, t32);
2010 }
2011 if (u) {
2012 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2013 } else {
2014 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2015 }
2016 tcg_temp_free_i32(t32);
2017 break;
2018
2019 case MO_32:
2020 t64 = tcg_temp_new_i64();
2021 if (d) {
2022 tcg_gen_neg_i64(t64, val);
2023 } else {
2024 tcg_gen_mov_i64(t64, val);
2025 }
2026 if (u) {
2027 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2028 } else {
2029 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2030 }
2031 tcg_temp_free_i64(t64);
2032 break;
2033
2034 case MO_64:
2035 if (u) {
2036 if (d) {
2037 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2038 } else {
2039 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2040 }
2041 } else if (d) {
2042 t64 = tcg_temp_new_i64();
2043 tcg_gen_neg_i64(t64, val);
2044 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2045 tcg_temp_free_i64(t64);
2046 } else {
2047 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2048 }
2049 break;
2050
2051 default:
2052 g_assert_not_reached();
2053 }
2054
2055 tcg_temp_free_ptr(dptr);
2056 tcg_temp_free_ptr(nptr);
24e82e68
RH
2057}
2058
3a7be554 2059static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2060{
2061 if (sve_access_check(s)) {
2062 unsigned fullsz = vec_full_reg_size(s);
2063 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2064 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2065 }
2066 return true;
2067}
2068
3a7be554 2069static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2070{
2071 if (sve_access_check(s)) {
2072 unsigned fullsz = vec_full_reg_size(s);
2073 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2074 int inc = numelem * a->imm * (a->d ? -1 : 1);
2075 TCGv_i64 reg = cpu_reg(s, a->rd);
2076
2077 tcg_gen_addi_i64(reg, reg, inc);
2078 }
2079 return true;
2080}
2081
3a7be554 2082static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2083{
2084 if (!sve_access_check(s)) {
2085 return true;
2086 }
2087
2088 unsigned fullsz = vec_full_reg_size(s);
2089 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2090 int inc = numelem * a->imm;
2091 TCGv_i64 reg = cpu_reg(s, a->rd);
2092
2093 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2094 if (inc == 0) {
2095 if (a->u) {
2096 tcg_gen_ext32u_i64(reg, reg);
2097 } else {
2098 tcg_gen_ext32s_i64(reg, reg);
2099 }
2100 } else {
d681f125 2101 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2102 }
2103 return true;
2104}
2105
3a7be554 2106static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2107{
2108 if (!sve_access_check(s)) {
2109 return true;
2110 }
2111
2112 unsigned fullsz = vec_full_reg_size(s);
2113 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2114 int inc = numelem * a->imm;
2115 TCGv_i64 reg = cpu_reg(s, a->rd);
2116
2117 if (inc != 0) {
d681f125 2118 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2119 }
2120 return true;
2121}
2122
3a7be554 2123static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2124{
2125 if (a->esz == 0) {
2126 return false;
2127 }
2128
2129 unsigned fullsz = vec_full_reg_size(s);
2130 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2131 int inc = numelem * a->imm;
2132
2133 if (inc != 0) {
2134 if (sve_access_check(s)) {
24e82e68
RH
2135 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2136 vec_full_reg_offset(s, a->rn),
d681f125
RH
2137 tcg_constant_i64(a->d ? -inc : inc),
2138 fullsz, fullsz);
24e82e68
RH
2139 }
2140 } else {
2141 do_mov_z(s, a->rd, a->rn);
2142 }
2143 return true;
2144}
2145
3a7be554 2146static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2147{
2148 if (a->esz == 0) {
2149 return false;
2150 }
2151
2152 unsigned fullsz = vec_full_reg_size(s);
2153 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2154 int inc = numelem * a->imm;
2155
2156 if (inc != 0) {
2157 if (sve_access_check(s)) {
d681f125
RH
2158 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2159 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2160 }
2161 } else {
2162 do_mov_z(s, a->rd, a->rn);
2163 }
2164 return true;
2165}
2166
e1fa1164
RH
2167/*
2168 *** SVE Bitwise Immediate Group
2169 */
2170
2171static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2172{
2173 uint64_t imm;
2174 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2175 extract32(a->dbm, 0, 6),
2176 extract32(a->dbm, 6, 6))) {
2177 return false;
2178 }
2179 if (sve_access_check(s)) {
2180 unsigned vsz = vec_full_reg_size(s);
2181 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2182 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2183 }
2184 return true;
2185}
2186
3a7be554 2187static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2188{
2189 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2190}
2191
3a7be554 2192static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2193{
2194 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2195}
2196
3a7be554 2197static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2198{
2199 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2200}
2201
3a7be554 2202static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2203{
2204 uint64_t imm;
2205 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2206 extract32(a->dbm, 0, 6),
2207 extract32(a->dbm, 6, 6))) {
2208 return false;
2209 }
2210 if (sve_access_check(s)) {
2211 do_dupi_z(s, a->rd, imm);
2212 }
2213 return true;
2214}
2215
f25a2361
RH
2216/*
2217 *** SVE Integer Wide Immediate - Predicated Group
2218 */
2219
2220/* Implement all merging copies. This is used for CPY (immediate),
2221 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2222 */
2223static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2224 TCGv_i64 val)
2225{
2226 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2227 static gen_cpy * const fns[4] = {
2228 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2229 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2230 };
2231 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2232 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2233 TCGv_ptr t_zd = tcg_temp_new_ptr();
2234 TCGv_ptr t_zn = tcg_temp_new_ptr();
2235 TCGv_ptr t_pg = tcg_temp_new_ptr();
2236
2237 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2238 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2239 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2240
2241 fns[esz](t_zd, t_zn, t_pg, val, desc);
2242
2243 tcg_temp_free_ptr(t_zd);
2244 tcg_temp_free_ptr(t_zn);
2245 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2246}
2247
3a7be554 2248static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2249{
2250 if (a->esz == 0) {
2251 return false;
2252 }
2253 if (sve_access_check(s)) {
2254 /* Decode the VFP immediate. */
2255 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2256 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2257 }
2258 return true;
2259}
2260
3a7be554 2261static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2262{
3a7be554 2263 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2264 return false;
2265 }
2266 if (sve_access_check(s)) {
e152b48b 2267 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2268 }
2269 return true;
2270}
2271
3a7be554 2272static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2273{
2274 static gen_helper_gvec_2i * const fns[4] = {
2275 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2276 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2277 };
2278
3a7be554 2279 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2280 return false;
2281 }
2282 if (sve_access_check(s)) {
2283 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2284 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2285 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2286 tcg_constant_i64(a->imm),
2287 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2288 }
2289 return true;
2290}
2291
b94f8f60
RH
2292/*
2293 *** SVE Permute Extract Group
2294 */
2295
75114792 2296static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2297{
2298 if (!sve_access_check(s)) {
2299 return true;
2300 }
2301
2302 unsigned vsz = vec_full_reg_size(s);
75114792 2303 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2304 unsigned n_siz = vsz - n_ofs;
75114792
SL
2305 unsigned d = vec_full_reg_offset(s, rd);
2306 unsigned n = vec_full_reg_offset(s, rn);
2307 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2308
2309 /* Use host vector move insns if we have appropriate sizes
2310 * and no unfortunate overlap.
2311 */
2312 if (m != d
2313 && n_ofs == size_for_gvec(n_ofs)
2314 && n_siz == size_for_gvec(n_siz)
2315 && (d != n || n_siz <= n_ofs)) {
2316 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2317 if (n_ofs != 0) {
2318 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2319 }
2320 } else {
2321 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2322 }
2323 return true;
2324}
2325
75114792
SL
2326static bool trans_EXT(DisasContext *s, arg_EXT *a)
2327{
2328 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2329}
2330
2331static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2332{
2333 if (!dc_isar_feature(aa64_sve2, s)) {
2334 return false;
2335 }
2336 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2337}
2338
30562ab7
RH
2339/*
2340 *** SVE Permute - Unpredicated Group
2341 */
2342
3a7be554 2343static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2344{
2345 if (sve_access_check(s)) {
2346 unsigned vsz = vec_full_reg_size(s);
2347 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2348 vsz, vsz, cpu_reg_sp(s, a->rn));
2349 }
2350 return true;
2351}
2352
3a7be554 2353static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2354{
2355 if ((a->imm & 0x1f) == 0) {
2356 return false;
2357 }
2358 if (sve_access_check(s)) {
2359 unsigned vsz = vec_full_reg_size(s);
2360 unsigned dofs = vec_full_reg_offset(s, a->rd);
2361 unsigned esz, index;
2362
2363 esz = ctz32(a->imm);
2364 index = a->imm >> (esz + 1);
2365
2366 if ((index << esz) < vsz) {
2367 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2368 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2369 } else {
7e17d50e
RH
2370 /*
2371 * While dup_mem handles 128-bit elements, dup_imm does not.
2372 * Thankfully element size doesn't matter for splatting zero.
2373 */
2374 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2375 }
2376 }
2377 return true;
2378}
2379
2380static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2381{
2382 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2383 static gen_insr * const fns[4] = {
2384 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2385 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2386 };
2387 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2388 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2389 TCGv_ptr t_zd = tcg_temp_new_ptr();
2390 TCGv_ptr t_zn = tcg_temp_new_ptr();
2391
2392 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2393 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2394
2395 fns[a->esz](t_zd, t_zn, val, desc);
2396
2397 tcg_temp_free_ptr(t_zd);
2398 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2399}
2400
3a7be554 2401static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2402{
2403 if (sve_access_check(s)) {
2404 TCGv_i64 t = tcg_temp_new_i64();
2405 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2406 do_insr_i64(s, a, t);
2407 tcg_temp_free_i64(t);
2408 }
2409 return true;
2410}
2411
3a7be554 2412static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2413{
2414 if (sve_access_check(s)) {
2415 do_insr_i64(s, a, cpu_reg(s, a->rm));
2416 }
2417 return true;
2418}
2419
0ea3cdbf
RH
2420static gen_helper_gvec_2 * const rev_fns[4] = {
2421 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2422 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2423};
2424TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2425
32e2ad65
RH
2426static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2427 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2428 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2429};
2430TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2431
5f425b92
RH
2432static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2433 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2434 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2435};
2436TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2437 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2438
32e2ad65
RH
2439static gen_helper_gvec_3 * const tbx_fns[4] = {
2440 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2441 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2442};
2443TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2444
3a7be554 2445static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2446{
2447 static gen_helper_gvec_2 * const fns[4][2] = {
2448 { NULL, NULL },
2449 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2450 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2451 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2452 };
2453
2454 if (a->esz == 0) {
2455 return false;
2456 }
2457 if (sve_access_check(s)) {
2458 unsigned vsz = vec_full_reg_size(s);
2459 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2460 vec_full_reg_offset(s, a->rn)
2461 + (a->h ? vsz / 2 : 0),
2462 vsz, vsz, 0, fns[a->esz][a->u]);
2463 }
2464 return true;
2465}
2466
d731d8cb
RH
2467/*
2468 *** SVE Permute - Predicates Group
2469 */
2470
2471static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2472 gen_helper_gvec_3 *fn)
2473{
2474 if (!sve_access_check(s)) {
2475 return true;
2476 }
2477
2478 unsigned vsz = pred_full_reg_size(s);
2479
d731d8cb
RH
2480 TCGv_ptr t_d = tcg_temp_new_ptr();
2481 TCGv_ptr t_n = tcg_temp_new_ptr();
2482 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2483 uint32_t desc = 0;
d731d8cb 2484
f9b0fcce
RH
2485 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2486 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2487 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2488
2489 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2490 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2491 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2492
c6a59b55 2493 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2494
2495 tcg_temp_free_ptr(t_d);
2496 tcg_temp_free_ptr(t_n);
2497 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2498 return true;
2499}
2500
2501static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2502 gen_helper_gvec_2 *fn)
2503{
2504 if (!sve_access_check(s)) {
2505 return true;
2506 }
2507
2508 unsigned vsz = pred_full_reg_size(s);
2509 TCGv_ptr t_d = tcg_temp_new_ptr();
2510 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2511 uint32_t desc = 0;
d731d8cb
RH
2512
2513 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2514 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2515
70acaafe
RH
2516 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2517 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2518 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2519
c6a59b55 2520 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2521
d731d8cb
RH
2522 tcg_temp_free_ptr(t_d);
2523 tcg_temp_free_ptr(t_n);
2524 return true;
2525}
2526
3a7be554 2527static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2528{
2529 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2530}
2531
3a7be554 2532static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2533{
2534 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2535}
2536
3a7be554 2537static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2538{
2539 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2540}
2541
3a7be554 2542static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2543{
2544 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2545}
2546
3a7be554 2547static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2548{
2549 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2550}
2551
3a7be554 2552static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2553{
2554 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2555}
2556
3a7be554 2557static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2558{
2559 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2560}
2561
3a7be554 2562static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2563{
2564 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2565}
2566
3a7be554 2567static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2568{
2569 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2570}
2571
234b48e9
RH
2572/*
2573 *** SVE Permute - Interleaving Group
2574 */
2575
2576static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2577{
2578 static gen_helper_gvec_3 * const fns[4] = {
2579 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2580 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2581 };
2582
2583 if (sve_access_check(s)) {
2584 unsigned vsz = vec_full_reg_size(s);
2585 unsigned high_ofs = high ? vsz / 2 : 0;
2586 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2587 vec_full_reg_offset(s, a->rn) + high_ofs,
2588 vec_full_reg_offset(s, a->rm) + high_ofs,
2589 vsz, vsz, 0, fns[a->esz]);
2590 }
2591 return true;
2592}
2593
3a7be554 2594static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2595{
2596 return do_zip(s, a, false);
2597}
2598
3a7be554 2599static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2600{
2601 return do_zip(s, a, true);
2602}
2603
74b64b25
RH
2604static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2605{
2606 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2607 return false;
2608 }
2609 if (sve_access_check(s)) {
2610 unsigned vsz = vec_full_reg_size(s);
2611 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2612 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2613 vec_full_reg_offset(s, a->rn) + high_ofs,
2614 vec_full_reg_offset(s, a->rm) + high_ofs,
2615 vsz, vsz, 0, gen_helper_sve2_zip_q);
2616 }
2617 return true;
2618}
2619
2620static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2621{
2622 return do_zip_q(s, a, false);
2623}
2624
2625static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2626{
2627 return do_zip_q(s, a, true);
2628}
2629
234b48e9
RH
2630static gen_helper_gvec_3 * const uzp_fns[4] = {
2631 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2632 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2633};
2634
32e2ad65
RH
2635TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2636 uzp_fns[a->esz], a, 0)
2637TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2638 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2639
32e2ad65
RH
2640TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2641 gen_helper_sve2_uzp_q, a, 0)
2642TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2643 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2644
234b48e9
RH
2645static gen_helper_gvec_3 * const trn_fns[4] = {
2646 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2647 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2648};
2649
32e2ad65
RH
2650TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2651 trn_fns[a->esz], a, 0)
2652TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2653 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2654
32e2ad65
RH
2655TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2656 gen_helper_sve2_trn_q, a, 0)
2657TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2658 gen_helper_sve2_trn_q, a, 16)
74b64b25 2659
3ca879ae
RH
2660/*
2661 *** SVE Permute Vector - Predicated Group
2662 */
2663
3a7be554 2664static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2665{
2666 static gen_helper_gvec_3 * const fns[4] = {
2667 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2668 };
2669 return do_zpz_ool(s, a, fns[a->esz]);
2670}
2671
ef23cb72
RH
2672/* Call the helper that computes the ARM LastActiveElement pseudocode
2673 * function, scaled by the element size. This includes the not found
2674 * indication; e.g. not found for esz=3 is -8.
2675 */
2676static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2677{
2678 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2679 * round up, as we do elsewhere, because we need the exact size.
2680 */
2681 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2682 unsigned desc = 0;
ef23cb72 2683
2acbfbe4
RH
2684 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2685 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2686
2687 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2688
c6a59b55 2689 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2690
ef23cb72
RH
2691 tcg_temp_free_ptr(t_p);
2692}
2693
2694/* Increment LAST to the offset of the next element in the vector,
2695 * wrapping around to 0.
2696 */
2697static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2698{
2699 unsigned vsz = vec_full_reg_size(s);
2700
2701 tcg_gen_addi_i32(last, last, 1 << esz);
2702 if (is_power_of_2(vsz)) {
2703 tcg_gen_andi_i32(last, last, vsz - 1);
2704 } else {
4b308bd5
RH
2705 TCGv_i32 max = tcg_constant_i32(vsz);
2706 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2707 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2708 }
2709}
2710
2711/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2712static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2713{
2714 unsigned vsz = vec_full_reg_size(s);
2715
2716 if (is_power_of_2(vsz)) {
2717 tcg_gen_andi_i32(last, last, vsz - 1);
2718 } else {
4b308bd5
RH
2719 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2720 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2721 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2722 }
2723}
2724
2725/* Load an unsigned element of ESZ from BASE+OFS. */
2726static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2727{
2728 TCGv_i64 r = tcg_temp_new_i64();
2729
2730 switch (esz) {
2731 case 0:
2732 tcg_gen_ld8u_i64(r, base, ofs);
2733 break;
2734 case 1:
2735 tcg_gen_ld16u_i64(r, base, ofs);
2736 break;
2737 case 2:
2738 tcg_gen_ld32u_i64(r, base, ofs);
2739 break;
2740 case 3:
2741 tcg_gen_ld_i64(r, base, ofs);
2742 break;
2743 default:
2744 g_assert_not_reached();
2745 }
2746 return r;
2747}
2748
2749/* Load an unsigned element of ESZ from RM[LAST]. */
2750static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2751 int rm, int esz)
2752{
2753 TCGv_ptr p = tcg_temp_new_ptr();
2754 TCGv_i64 r;
2755
2756 /* Convert offset into vector into offset into ENV.
2757 * The final adjustment for the vector register base
2758 * is added via constant offset to the load.
2759 */
e03b5686 2760#if HOST_BIG_ENDIAN
ef23cb72
RH
2761 /* Adjust for element ordering. See vec_reg_offset. */
2762 if (esz < 3) {
2763 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2764 }
2765#endif
2766 tcg_gen_ext_i32_ptr(p, last);
2767 tcg_gen_add_ptr(p, p, cpu_env);
2768
2769 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2770 tcg_temp_free_ptr(p);
2771
2772 return r;
2773}
2774
2775/* Compute CLAST for a Zreg. */
2776static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2777{
2778 TCGv_i32 last;
2779 TCGLabel *over;
2780 TCGv_i64 ele;
2781 unsigned vsz, esz = a->esz;
2782
2783 if (!sve_access_check(s)) {
2784 return true;
2785 }
2786
2787 last = tcg_temp_local_new_i32();
2788 over = gen_new_label();
2789
2790 find_last_active(s, last, esz, a->pg);
2791
2792 /* There is of course no movcond for a 2048-bit vector,
2793 * so we must branch over the actual store.
2794 */
2795 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2796
2797 if (!before) {
2798 incr_last_active(s, last, esz);
2799 }
2800
2801 ele = load_last_active(s, last, a->rm, esz);
2802 tcg_temp_free_i32(last);
2803
2804 vsz = vec_full_reg_size(s);
2805 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2806 tcg_temp_free_i64(ele);
2807
2808 /* If this insn used MOVPRFX, we may need a second move. */
2809 if (a->rd != a->rn) {
2810 TCGLabel *done = gen_new_label();
2811 tcg_gen_br(done);
2812
2813 gen_set_label(over);
2814 do_mov_z(s, a->rd, a->rn);
2815
2816 gen_set_label(done);
2817 } else {
2818 gen_set_label(over);
2819 }
2820 return true;
2821}
2822
3a7be554 2823static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2824{
2825 return do_clast_vector(s, a, false);
2826}
2827
3a7be554 2828static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2829{
2830 return do_clast_vector(s, a, true);
2831}
2832
2833/* Compute CLAST for a scalar. */
2834static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2835 bool before, TCGv_i64 reg_val)
2836{
2837 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2838 TCGv_i64 ele, cmp;
ef23cb72
RH
2839
2840 find_last_active(s, last, esz, pg);
2841
2842 /* Extend the original value of last prior to incrementing. */
2843 cmp = tcg_temp_new_i64();
2844 tcg_gen_ext_i32_i64(cmp, last);
2845
2846 if (!before) {
2847 incr_last_active(s, last, esz);
2848 }
2849
2850 /* The conceit here is that while last < 0 indicates not found, after
2851 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2852 * from which we can load garbage. We then discard the garbage with
2853 * a conditional move.
2854 */
2855 ele = load_last_active(s, last, rm, esz);
2856 tcg_temp_free_i32(last);
2857
053552d3
RH
2858 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2859 ele, reg_val);
ef23cb72 2860
ef23cb72
RH
2861 tcg_temp_free_i64(cmp);
2862 tcg_temp_free_i64(ele);
2863}
2864
2865/* Compute CLAST for a Vreg. */
2866static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2867{
2868 if (sve_access_check(s)) {
2869 int esz = a->esz;
2870 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2871 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2872
2873 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2874 write_fp_dreg(s, a->rd, reg);
2875 tcg_temp_free_i64(reg);
2876 }
2877 return true;
2878}
2879
3a7be554 2880static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2881{
2882 return do_clast_fp(s, a, false);
2883}
2884
3a7be554 2885static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2886{
2887 return do_clast_fp(s, a, true);
2888}
2889
2890/* Compute CLAST for a Xreg. */
2891static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2892{
2893 TCGv_i64 reg;
2894
2895 if (!sve_access_check(s)) {
2896 return true;
2897 }
2898
2899 reg = cpu_reg(s, a->rd);
2900 switch (a->esz) {
2901 case 0:
2902 tcg_gen_ext8u_i64(reg, reg);
2903 break;
2904 case 1:
2905 tcg_gen_ext16u_i64(reg, reg);
2906 break;
2907 case 2:
2908 tcg_gen_ext32u_i64(reg, reg);
2909 break;
2910 case 3:
2911 break;
2912 default:
2913 g_assert_not_reached();
2914 }
2915
2916 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2917 return true;
2918}
2919
3a7be554 2920static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2921{
2922 return do_clast_general(s, a, false);
2923}
2924
3a7be554 2925static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2926{
2927 return do_clast_general(s, a, true);
2928}
2929
2930/* Compute LAST for a scalar. */
2931static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2932 int pg, int rm, bool before)
2933{
2934 TCGv_i32 last = tcg_temp_new_i32();
2935 TCGv_i64 ret;
2936
2937 find_last_active(s, last, esz, pg);
2938 if (before) {
2939 wrap_last_active(s, last, esz);
2940 } else {
2941 incr_last_active(s, last, esz);
2942 }
2943
2944 ret = load_last_active(s, last, rm, esz);
2945 tcg_temp_free_i32(last);
2946 return ret;
2947}
2948
2949/* Compute LAST for a Vreg. */
2950static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2951{
2952 if (sve_access_check(s)) {
2953 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2954 write_fp_dreg(s, a->rd, val);
2955 tcg_temp_free_i64(val);
2956 }
2957 return true;
2958}
2959
3a7be554 2960static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2961{
2962 return do_last_fp(s, a, false);
2963}
2964
3a7be554 2965static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2966{
2967 return do_last_fp(s, a, true);
2968}
2969
2970/* Compute LAST for a Xreg. */
2971static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2972{
2973 if (sve_access_check(s)) {
2974 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2975 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2976 tcg_temp_free_i64(val);
2977 }
2978 return true;
2979}
2980
3a7be554 2981static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2982{
2983 return do_last_general(s, a, false);
2984}
2985
3a7be554 2986static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2987{
2988 return do_last_general(s, a, true);
2989}
2990
3a7be554 2991static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2992{
2993 if (sve_access_check(s)) {
2994 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2995 }
2996 return true;
2997}
2998
3a7be554 2999static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3000{
3001 if (sve_access_check(s)) {
3002 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3003 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3004 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3005 tcg_temp_free_i64(t);
3006 }
3007 return true;
3008}
3009
3a7be554 3010static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3011{
3012 static gen_helper_gvec_3 * const fns[4] = {
3013 NULL,
3014 gen_helper_sve_revb_h,
3015 gen_helper_sve_revb_s,
3016 gen_helper_sve_revb_d,
3017 };
3018 return do_zpz_ool(s, a, fns[a->esz]);
3019}
3020
3a7be554 3021static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3022{
3023 static gen_helper_gvec_3 * const fns[4] = {
3024 NULL,
3025 NULL,
3026 gen_helper_sve_revh_s,
3027 gen_helper_sve_revh_d,
3028 };
3029 return do_zpz_ool(s, a, fns[a->esz]);
3030}
3031
3a7be554 3032static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3033{
3034 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3035}
3036
3a7be554 3037static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3038{
3039 static gen_helper_gvec_3 * const fns[4] = {
3040 gen_helper_sve_rbit_b,
3041 gen_helper_sve_rbit_h,
3042 gen_helper_sve_rbit_s,
3043 gen_helper_sve_rbit_d,
3044 };
3045 return do_zpz_ool(s, a, fns[a->esz]);
3046}
3047
3a7be554 3048static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
3049{
3050 if (sve_access_check(s)) {
36cbb7a8 3051 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 3052 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
3053 }
3054 return true;
3055}
3056
75114792
SL
3057static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3058{
3059 if (!dc_isar_feature(aa64_sve2, s)) {
3060 return false;
3061 }
3062 if (sve_access_check(s)) {
3063 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3064 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3065 }
3066 return true;
3067}
3068
757f9cff
RH
3069/*
3070 *** SVE Integer Compare - Vectors Group
3071 */
3072
3073static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3074 gen_helper_gvec_flags_4 *gen_fn)
3075{
3076 TCGv_ptr pd, zn, zm, pg;
3077 unsigned vsz;
3078 TCGv_i32 t;
3079
3080 if (gen_fn == NULL) {
3081 return false;
3082 }
3083 if (!sve_access_check(s)) {
3084 return true;
3085 }
3086
3087 vsz = vec_full_reg_size(s);
392acacc 3088 t = tcg_temp_new_i32();
757f9cff
RH
3089 pd = tcg_temp_new_ptr();
3090 zn = tcg_temp_new_ptr();
3091 zm = tcg_temp_new_ptr();
3092 pg = tcg_temp_new_ptr();
3093
3094 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3095 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3096 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3097 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3098
392acacc 3099 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
3100
3101 tcg_temp_free_ptr(pd);
3102 tcg_temp_free_ptr(zn);
3103 tcg_temp_free_ptr(zm);
3104 tcg_temp_free_ptr(pg);
3105
3106 do_pred_flags(t);
3107
3108 tcg_temp_free_i32(t);
3109 return true;
3110}
3111
3112#define DO_PPZZ(NAME, name) \
3a7be554 3113static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3114{ \
3115 static gen_helper_gvec_flags_4 * const fns[4] = { \
3116 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3117 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3118 }; \
3119 return do_ppzz_flags(s, a, fns[a->esz]); \
3120}
3121
3122DO_PPZZ(CMPEQ, cmpeq)
3123DO_PPZZ(CMPNE, cmpne)
3124DO_PPZZ(CMPGT, cmpgt)
3125DO_PPZZ(CMPGE, cmpge)
3126DO_PPZZ(CMPHI, cmphi)
3127DO_PPZZ(CMPHS, cmphs)
3128
3129#undef DO_PPZZ
3130
3131#define DO_PPZW(NAME, name) \
3a7be554 3132static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3133{ \
3134 static gen_helper_gvec_flags_4 * const fns[4] = { \
3135 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3136 gen_helper_sve_##name##_ppzw_s, NULL \
3137 }; \
3138 return do_ppzz_flags(s, a, fns[a->esz]); \
3139}
3140
3141DO_PPZW(CMPEQ, cmpeq)
3142DO_PPZW(CMPNE, cmpne)
3143DO_PPZW(CMPGT, cmpgt)
3144DO_PPZW(CMPGE, cmpge)
3145DO_PPZW(CMPHI, cmphi)
3146DO_PPZW(CMPHS, cmphs)
3147DO_PPZW(CMPLT, cmplt)
3148DO_PPZW(CMPLE, cmple)
3149DO_PPZW(CMPLO, cmplo)
3150DO_PPZW(CMPLS, cmpls)
3151
3152#undef DO_PPZW
3153
38cadeba
RH
3154/*
3155 *** SVE Integer Compare - Immediate Groups
3156 */
3157
3158static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3159 gen_helper_gvec_flags_3 *gen_fn)
3160{
3161 TCGv_ptr pd, zn, pg;
3162 unsigned vsz;
3163 TCGv_i32 t;
3164
3165 if (gen_fn == NULL) {
3166 return false;
3167 }
3168 if (!sve_access_check(s)) {
3169 return true;
3170 }
3171
3172 vsz = vec_full_reg_size(s);
392acacc 3173 t = tcg_temp_new_i32();
38cadeba
RH
3174 pd = tcg_temp_new_ptr();
3175 zn = tcg_temp_new_ptr();
3176 pg = tcg_temp_new_ptr();
3177
3178 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3179 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3180 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3181
392acacc 3182 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3183
3184 tcg_temp_free_ptr(pd);
3185 tcg_temp_free_ptr(zn);
3186 tcg_temp_free_ptr(pg);
3187
3188 do_pred_flags(t);
3189
3190 tcg_temp_free_i32(t);
3191 return true;
3192}
3193
3194#define DO_PPZI(NAME, name) \
3a7be554 3195static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3196{ \
3197 static gen_helper_gvec_flags_3 * const fns[4] = { \
3198 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3199 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3200 }; \
3201 return do_ppzi_flags(s, a, fns[a->esz]); \
3202}
3203
3204DO_PPZI(CMPEQ, cmpeq)
3205DO_PPZI(CMPNE, cmpne)
3206DO_PPZI(CMPGT, cmpgt)
3207DO_PPZI(CMPGE, cmpge)
3208DO_PPZI(CMPHI, cmphi)
3209DO_PPZI(CMPHS, cmphs)
3210DO_PPZI(CMPLT, cmplt)
3211DO_PPZI(CMPLE, cmple)
3212DO_PPZI(CMPLO, cmplo)
3213DO_PPZI(CMPLS, cmpls)
3214
3215#undef DO_PPZI
3216
35da316f
RH
3217/*
3218 *** SVE Partition Break Group
3219 */
3220
3221static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3222 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3223{
3224 if (!sve_access_check(s)) {
3225 return true;
3226 }
3227
3228 unsigned vsz = pred_full_reg_size(s);
3229
3230 /* Predicate sizes may be smaller and cannot use simd_desc. */
3231 TCGv_ptr d = tcg_temp_new_ptr();
3232 TCGv_ptr n = tcg_temp_new_ptr();
3233 TCGv_ptr m = tcg_temp_new_ptr();
3234 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3235 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3236
3237 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3238 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3239 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3240 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3241
3242 if (a->s) {
93418f1c
RH
3243 TCGv_i32 t = tcg_temp_new_i32();
3244 fn_s(t, d, n, m, g, desc);
35da316f 3245 do_pred_flags(t);
93418f1c 3246 tcg_temp_free_i32(t);
35da316f 3247 } else {
93418f1c 3248 fn(d, n, m, g, desc);
35da316f
RH
3249 }
3250 tcg_temp_free_ptr(d);
3251 tcg_temp_free_ptr(n);
3252 tcg_temp_free_ptr(m);
3253 tcg_temp_free_ptr(g);
35da316f
RH
3254 return true;
3255}
3256
3257static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3258 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3259{
3260 if (!sve_access_check(s)) {
3261 return true;
3262 }
3263
3264 unsigned vsz = pred_full_reg_size(s);
3265
3266 /* Predicate sizes may be smaller and cannot use simd_desc. */
3267 TCGv_ptr d = tcg_temp_new_ptr();
3268 TCGv_ptr n = tcg_temp_new_ptr();
3269 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3270 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3271
3272 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3273 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3274 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3275
3276 if (a->s) {
93418f1c
RH
3277 TCGv_i32 t = tcg_temp_new_i32();
3278 fn_s(t, d, n, g, desc);
35da316f 3279 do_pred_flags(t);
93418f1c 3280 tcg_temp_free_i32(t);
35da316f 3281 } else {
93418f1c 3282 fn(d, n, g, desc);
35da316f
RH
3283 }
3284 tcg_temp_free_ptr(d);
3285 tcg_temp_free_ptr(n);
3286 tcg_temp_free_ptr(g);
35da316f
RH
3287 return true;
3288}
3289
3a7be554 3290static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3291{
3292 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3293}
3294
3a7be554 3295static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3296{
3297 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3298}
3299
3a7be554 3300static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3301{
3302 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3303}
3304
3a7be554 3305static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3306{
3307 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3308}
3309
3a7be554 3310static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3311{
3312 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3313}
3314
3a7be554 3315static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3316{
3317 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3318}
3319
3a7be554 3320static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3321{
3322 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3323}
3324
9ee3a611
RH
3325/*
3326 *** SVE Predicate Count Group
3327 */
3328
3329static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3330{
3331 unsigned psz = pred_full_reg_size(s);
3332
3333 if (psz <= 8) {
3334 uint64_t psz_mask;
3335
3336 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3337 if (pn != pg) {
3338 TCGv_i64 g = tcg_temp_new_i64();
3339 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3340 tcg_gen_and_i64(val, val, g);
3341 tcg_temp_free_i64(g);
3342 }
3343
3344 /* Reduce the pred_esz_masks value simply to reduce the
3345 * size of the code generated here.
3346 */
3347 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3348 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3349
3350 tcg_gen_ctpop_i64(val, val);
3351 } else {
3352 TCGv_ptr t_pn = tcg_temp_new_ptr();
3353 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3354 unsigned desc = 0;
9ee3a611 3355
f556a201
RH
3356 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3357 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3358
3359 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3360 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3361
c6a59b55 3362 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3363 tcg_temp_free_ptr(t_pn);
3364 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3365 }
3366}
3367
3a7be554 3368static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3369{
3370 if (sve_access_check(s)) {
3371 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3372 }
3373 return true;
3374}
3375
3a7be554 3376static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3377{
3378 if (sve_access_check(s)) {
3379 TCGv_i64 reg = cpu_reg(s, a->rd);
3380 TCGv_i64 val = tcg_temp_new_i64();
3381
3382 do_cntp(s, val, a->esz, a->pg, a->pg);
3383 if (a->d) {
3384 tcg_gen_sub_i64(reg, reg, val);
3385 } else {
3386 tcg_gen_add_i64(reg, reg, val);
3387 }
3388 tcg_temp_free_i64(val);
3389 }
3390 return true;
3391}
3392
3a7be554 3393static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3394{
3395 if (a->esz == 0) {
3396 return false;
3397 }
3398 if (sve_access_check(s)) {
3399 unsigned vsz = vec_full_reg_size(s);
3400 TCGv_i64 val = tcg_temp_new_i64();
3401 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3402
3403 do_cntp(s, val, a->esz, a->pg, a->pg);
3404 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3405 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3406 }
3407 return true;
3408}
3409
3a7be554 3410static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3411{
3412 if (sve_access_check(s)) {
3413 TCGv_i64 reg = cpu_reg(s, a->rd);
3414 TCGv_i64 val = tcg_temp_new_i64();
3415
3416 do_cntp(s, val, a->esz, a->pg, a->pg);
3417 do_sat_addsub_32(reg, val, a->u, a->d);
3418 }
3419 return true;
3420}
3421
3a7be554 3422static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3423{
3424 if (sve_access_check(s)) {
3425 TCGv_i64 reg = cpu_reg(s, a->rd);
3426 TCGv_i64 val = tcg_temp_new_i64();
3427
3428 do_cntp(s, val, a->esz, a->pg, a->pg);
3429 do_sat_addsub_64(reg, val, a->u, a->d);
3430 }
3431 return true;
3432}
3433
3a7be554 3434static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3435{
3436 if (a->esz == 0) {
3437 return false;
3438 }
3439 if (sve_access_check(s)) {
3440 TCGv_i64 val = tcg_temp_new_i64();
3441 do_cntp(s, val, a->esz, a->pg, a->pg);
3442 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3443 }
3444 return true;
3445}
3446
caf1cefc
RH
3447/*
3448 *** SVE Integer Compare Scalars Group
3449 */
3450
3a7be554 3451static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3452{
3453 if (!sve_access_check(s)) {
3454 return true;
3455 }
3456
3457 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3458 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3459 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3460 TCGv_i64 cmp = tcg_temp_new_i64();
3461
3462 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3463 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3464 tcg_temp_free_i64(cmp);
3465
3466 /* VF = !NF & !CF. */
3467 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3468 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3469
3470 /* Both NF and VF actually look at bit 31. */
3471 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3472 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3473 return true;
3474}
3475
3a7be554 3476static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3477{
bbd0968c 3478 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3479 TCGv_i32 t2;
caf1cefc 3480 TCGv_ptr ptr;
e610906c
RH
3481 unsigned vsz = vec_full_reg_size(s);
3482 unsigned desc = 0;
caf1cefc 3483 TCGCond cond;
34688dbc
RH
3484 uint64_t maxval;
3485 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3486 bool eq = a->eq == a->lt;
caf1cefc 3487
34688dbc
RH
3488 /* The greater-than conditions are all SVE2. */
3489 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3490 return false;
3491 }
bbd0968c
RH
3492 if (!sve_access_check(s)) {
3493 return true;
3494 }
3495
3496 op0 = read_cpu_reg(s, a->rn, 1);
3497 op1 = read_cpu_reg(s, a->rm, 1);
3498
caf1cefc
RH
3499 if (!a->sf) {
3500 if (a->u) {
3501 tcg_gen_ext32u_i64(op0, op0);
3502 tcg_gen_ext32u_i64(op1, op1);
3503 } else {
3504 tcg_gen_ext32s_i64(op0, op0);
3505 tcg_gen_ext32s_i64(op1, op1);
3506 }
3507 }
3508
3509 /* For the helper, compress the different conditions into a computation
3510 * of how many iterations for which the condition is true.
caf1cefc 3511 */
bbd0968c
RH
3512 t0 = tcg_temp_new_i64();
3513 t1 = tcg_temp_new_i64();
34688dbc
RH
3514
3515 if (a->lt) {
3516 tcg_gen_sub_i64(t0, op1, op0);
3517 if (a->u) {
3518 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3519 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3520 } else {
3521 maxval = a->sf ? INT64_MAX : INT32_MAX;
3522 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3523 }
3524 } else {
3525 tcg_gen_sub_i64(t0, op0, op1);
3526 if (a->u) {
3527 maxval = 0;
3528 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3529 } else {
3530 maxval = a->sf ? INT64_MIN : INT32_MIN;
3531 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3532 }
3533 }
caf1cefc 3534
4481bbf2 3535 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3536 if (eq) {
caf1cefc
RH
3537 /* Equality means one more iteration. */
3538 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3539
34688dbc
RH
3540 /*
3541 * For the less-than while, if op1 is maxval (and the only time
3542 * the addition above could overflow), then we produce an all-true
3543 * predicate by setting the count to the vector length. This is
3544 * because the pseudocode is described as an increment + compare
3545 * loop, and the maximum integer would always compare true.
3546 * Similarly, the greater-than while has the same issue with the
3547 * minimum integer due to the decrement + compare loop.
bbd0968c 3548 */
34688dbc 3549 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3550 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3551 }
3552
bbd0968c
RH
3553 /* Bound to the maximum. */
3554 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3555
3556 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3557 tcg_gen_movi_i64(t1, 0);
3558 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3559 tcg_temp_free_i64(t1);
caf1cefc 3560
bbd0968c 3561 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3562 t2 = tcg_temp_new_i32();
3563 tcg_gen_extrl_i64_i32(t2, t0);
3564 tcg_temp_free_i64(t0);
bbd0968c
RH
3565
3566 /* Scale elements to bits. */
3567 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3568
e610906c
RH
3569 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3570 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3571
3572 ptr = tcg_temp_new_ptr();
3573 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3574
34688dbc 3575 if (a->lt) {
4481bbf2 3576 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3577 } else {
4481bbf2 3578 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3579 }
caf1cefc
RH
3580 do_pred_flags(t2);
3581
3582 tcg_temp_free_ptr(ptr);
3583 tcg_temp_free_i32(t2);
caf1cefc
RH
3584 return true;
3585}
3586
14f6dad1
RH
3587static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3588{
3589 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3590 TCGv_i32 t2;
14f6dad1
RH
3591 TCGv_ptr ptr;
3592 unsigned vsz = vec_full_reg_size(s);
3593 unsigned desc = 0;
3594
3595 if (!dc_isar_feature(aa64_sve2, s)) {
3596 return false;
3597 }
3598 if (!sve_access_check(s)) {
3599 return true;
3600 }
3601
3602 op0 = read_cpu_reg(s, a->rn, 1);
3603 op1 = read_cpu_reg(s, a->rm, 1);
3604
4481bbf2 3605 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3606 diff = tcg_temp_new_i64();
3607
3608 if (a->rw) {
3609 /* WHILERW */
3610 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3611 t1 = tcg_temp_new_i64();
3612 tcg_gen_sub_i64(diff, op0, op1);
3613 tcg_gen_sub_i64(t1, op1, op0);
3614 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3615 tcg_temp_free_i64(t1);
3616 /* Round down to a multiple of ESIZE. */
3617 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3618 /* If op1 == op0, diff == 0, and the condition is always true. */
3619 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3620 } else {
3621 /* WHILEWR */
3622 tcg_gen_sub_i64(diff, op1, op0);
3623 /* Round down to a multiple of ESIZE. */
3624 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3625 /* If op0 >= op1, diff <= 0, the condition is always true. */
3626 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3627 }
3628
3629 /* Bound to the maximum. */
3630 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3631
3632 /* Since we're bounded, pass as a 32-bit type. */
3633 t2 = tcg_temp_new_i32();
3634 tcg_gen_extrl_i64_i32(t2, diff);
3635 tcg_temp_free_i64(diff);
3636
3637 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3638 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3639
3640 ptr = tcg_temp_new_ptr();
3641 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3642
4481bbf2 3643 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3644 do_pred_flags(t2);
3645
3646 tcg_temp_free_ptr(ptr);
3647 tcg_temp_free_i32(t2);
14f6dad1
RH
3648 return true;
3649}
3650
ed491961
RH
3651/*
3652 *** SVE Integer Wide Immediate - Unpredicated Group
3653 */
3654
3a7be554 3655static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3656{
3657 if (a->esz == 0) {
3658 return false;
3659 }
3660 if (sve_access_check(s)) {
3661 unsigned vsz = vec_full_reg_size(s);
3662 int dofs = vec_full_reg_offset(s, a->rd);
3663 uint64_t imm;
3664
3665 /* Decode the VFP immediate. */
3666 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3667 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3668 }
3669 return true;
3670}
3671
3a7be554 3672static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3673{
3a7be554 3674 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3675 return false;
3676 }
3677 if (sve_access_check(s)) {
3678 unsigned vsz = vec_full_reg_size(s);
3679 int dofs = vec_full_reg_offset(s, a->rd);
3680
8711e71f 3681 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3682 }
3683 return true;
3684}
3685
3a7be554 3686static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3687{
3a7be554 3688 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3689 return false;
3690 }
3691 if (sve_access_check(s)) {
3692 unsigned vsz = vec_full_reg_size(s);
3693 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3694 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3695 }
3696 return true;
3697}
3698
3a7be554 3699static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3700{
3701 a->imm = -a->imm;
3a7be554 3702 return trans_ADD_zzi(s, a);
6e6a157d
RH
3703}
3704
3a7be554 3705static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3706{
53229a77 3707 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3708 static const GVecGen2s op[4] = {
3709 { .fni8 = tcg_gen_vec_sub8_i64,
3710 .fniv = tcg_gen_sub_vec,
3711 .fno = gen_helper_sve_subri_b,
53229a77 3712 .opt_opc = vecop_list,
6e6a157d
RH
3713 .vece = MO_8,
3714 .scalar_first = true },
3715 { .fni8 = tcg_gen_vec_sub16_i64,
3716 .fniv = tcg_gen_sub_vec,
3717 .fno = gen_helper_sve_subri_h,
53229a77 3718 .opt_opc = vecop_list,
6e6a157d
RH
3719 .vece = MO_16,
3720 .scalar_first = true },
3721 { .fni4 = tcg_gen_sub_i32,
3722 .fniv = tcg_gen_sub_vec,
3723 .fno = gen_helper_sve_subri_s,
53229a77 3724 .opt_opc = vecop_list,
6e6a157d
RH
3725 .vece = MO_32,
3726 .scalar_first = true },
3727 { .fni8 = tcg_gen_sub_i64,
3728 .fniv = tcg_gen_sub_vec,
3729 .fno = gen_helper_sve_subri_d,
53229a77 3730 .opt_opc = vecop_list,
6e6a157d
RH
3731 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3732 .vece = MO_64,
3733 .scalar_first = true }
3734 };
3735
3a7be554 3736 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3737 return false;
3738 }
3739 if (sve_access_check(s)) {
3740 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3741 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3742 vec_full_reg_offset(s, a->rn),
9fff3fcc 3743 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3744 }
3745 return true;
3746}
3747
3a7be554 3748static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3749{
3750 if (sve_access_check(s)) {
3751 unsigned vsz = vec_full_reg_size(s);
3752 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3753 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3754 }
3755 return true;
3756}
3757
3a7be554 3758static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3759{
3a7be554 3760 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3761 return false;
3762 }
3763 if (sve_access_check(s)) {
138a1f7b
RH
3764 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3765 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3766 }
3767 return true;
3768}
3769
3a7be554 3770static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3771{
3a7be554 3772 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3773}
3774
3a7be554 3775static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3776{
3a7be554 3777 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3778}
3779
3a7be554 3780static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3781{
3a7be554 3782 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3783}
3784
3a7be554 3785static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3786{
3a7be554 3787 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3788}
3789
3790static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3791{
3792 if (sve_access_check(s)) {
3793 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3794 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3795 vec_full_reg_offset(s, a->rn),
138a1f7b 3796 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3797 }
3798 return true;
3799}
3800
3801#define DO_ZZI(NAME, name) \
3a7be554 3802static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3803{ \
3804 static gen_helper_gvec_2i * const fns[4] = { \
3805 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3806 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3807 }; \
3808 return do_zzi_ool(s, a, fns[a->esz]); \
3809}
3810
3811DO_ZZI(SMAX, smax)
3812DO_ZZI(UMAX, umax)
3813DO_ZZI(SMIN, smin)
3814DO_ZZI(UMIN, umin)
3815
3816#undef DO_ZZI
3817
5f425b92
RH
3818static gen_helper_gvec_4 * const dot_fns[2][2] = {
3819 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3820 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3821};
3822TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3823 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3824
814d4c52
RH
3825/*
3826 * SVE Multiply - Indexed
3827 */
3828
f3500a25
RH
3829TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3830 gen_helper_gvec_sdot_idx_b, a)
3831TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3832 gen_helper_gvec_sdot_idx_h, a)
3833TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3834 gen_helper_gvec_udot_idx_b, a)
3835TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3836 gen_helper_gvec_udot_idx_h, a)
3837
3838TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3839 gen_helper_gvec_sudot_idx_b, a)
3840TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3841 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3842
814d4c52 3843#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3844 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3845 a->rd, a->rn, a->rm, a->index)
814d4c52 3846
af031f64
RH
3847DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3848DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3849DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3850
af031f64
RH
3851DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3852DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3853DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3854
af031f64
RH
3855DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3856DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3857DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3858
814d4c52
RH
3859#undef DO_SVE2_RRX
3860
b95f5eeb 3861#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3862 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3863 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3864
3865DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3866DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3867DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3868DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3869
3870DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3871DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3872DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3873DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3874
3875DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3876DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3877DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3878DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3879
b95f5eeb
RH
3880#undef DO_SVE2_RRX_TB
3881
8a02aac7
RH
3882static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
3883 int data, gen_helper_gvec_4 *fn)
3884{
3885 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
3886 return false;
3887 }
3888 if (sve_access_check(s)) {
3889 unsigned vsz = vec_full_reg_size(s);
3890 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
3891 vec_full_reg_offset(s, rn),
3892 vec_full_reg_offset(s, rm),
3893 vec_full_reg_offset(s, ra),
3894 vsz, vsz, data, fn);
3895 }
3896 return true;
3897}
3898
3899#define DO_SVE2_RRXR(NAME, FUNC) \
3900 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3901 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
3902
3903DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3904DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3905DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3906
3907DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3908DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3909DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3910
75d6d5fc
RH
3911DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3912DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3913DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3914
3915DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3916DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3917DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3918
8a02aac7
RH
3919#undef DO_SVE2_RRXR
3920
c5c455d7
RH
3921#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3922 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3923 { \
3924 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
3925 (a->index << 1) | TOP, FUNC); \
3926 }
3927
3928DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3929DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3930DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3931DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3932
3933DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3934DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3935DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3936DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
d462469f
RH
3937
3938DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3939DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3940DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3941DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3942
3943DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3944DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3945DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3946DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3947
3948DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3949DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3950DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3951DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3952
3953DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3954DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3955DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3956DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3957
3958#undef DO_SVE2_RRXR_TB
3959
3b787ed8
RH
3960#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3961 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
3962 { \
3963 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \
3964 (a->index << 2) | a->rot, FUNC); \
3965 }
3966
3967DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3968DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3969
3970DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3971DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3972
21068f39
RH
3973DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3974DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3975
3b787ed8
RH
3976#undef DO_SVE2_RRXR_ROT
3977
ca40a6e6
RH
3978/*
3979 *** SVE Floating Point Multiply-Add Indexed Group
3980 */
3981
0a82d963 3982static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
3983{
3984 static gen_helper_gvec_4_ptr * const fns[3] = {
3985 gen_helper_gvec_fmla_idx_h,
3986 gen_helper_gvec_fmla_idx_s,
3987 gen_helper_gvec_fmla_idx_d,
3988 };
3989
3990 if (sve_access_check(s)) {
3991 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3992 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3993 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3994 vec_full_reg_offset(s, a->rn),
3995 vec_full_reg_offset(s, a->rm),
3996 vec_full_reg_offset(s, a->ra),
0a82d963 3997 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
3998 fns[a->esz - 1]);
3999 tcg_temp_free_ptr(status);
4000 }
4001 return true;
4002}
4003
0a82d963
RH
4004static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4005{
4006 return do_FMLA_zzxz(s, a, false);
4007}
4008
4009static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4010{
4011 return do_FMLA_zzxz(s, a, true);
4012}
4013
ca40a6e6
RH
4014/*
4015 *** SVE Floating Point Multiply Indexed Group
4016 */
4017
3a7be554 4018static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
4019{
4020 static gen_helper_gvec_3_ptr * const fns[3] = {
4021 gen_helper_gvec_fmul_idx_h,
4022 gen_helper_gvec_fmul_idx_s,
4023 gen_helper_gvec_fmul_idx_d,
4024 };
4025
4026 if (sve_access_check(s)) {
4027 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4028 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4029 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4030 vec_full_reg_offset(s, a->rn),
4031 vec_full_reg_offset(s, a->rm),
4032 status, vsz, vsz, a->index, fns[a->esz - 1]);
4033 tcg_temp_free_ptr(status);
4034 }
4035 return true;
4036}
4037
23fbe79f
RH
4038/*
4039 *** SVE Floating Point Fast Reduction Group
4040 */
4041
4042typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4043 TCGv_ptr, TCGv_i32);
4044
4045static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4046 gen_helper_fp_reduce *fn)
4047{
4048 unsigned vsz = vec_full_reg_size(s);
4049 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 4050 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
4051 TCGv_ptr t_zn, t_pg, status;
4052 TCGv_i64 temp;
4053
4054 temp = tcg_temp_new_i64();
4055 t_zn = tcg_temp_new_ptr();
4056 t_pg = tcg_temp_new_ptr();
4057
4058 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4059 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4060 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
4061
4062 fn(temp, t_zn, t_pg, status, t_desc);
4063 tcg_temp_free_ptr(t_zn);
4064 tcg_temp_free_ptr(t_pg);
4065 tcg_temp_free_ptr(status);
23fbe79f
RH
4066
4067 write_fp_dreg(s, a->rd, temp);
4068 tcg_temp_free_i64(temp);
4069}
4070
4071#define DO_VPZ(NAME, name) \
3a7be554 4072static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
4073{ \
4074 static gen_helper_fp_reduce * const fns[3] = { \
4075 gen_helper_sve_##name##_h, \
4076 gen_helper_sve_##name##_s, \
4077 gen_helper_sve_##name##_d, \
4078 }; \
4079 if (a->esz == 0) { \
4080 return false; \
4081 } \
4082 if (sve_access_check(s)) { \
4083 do_reduce(s, a, fns[a->esz - 1]); \
4084 } \
4085 return true; \
4086}
4087
4088DO_VPZ(FADDV, faddv)
4089DO_VPZ(FMINNMV, fminnmv)
4090DO_VPZ(FMAXNMV, fmaxnmv)
4091DO_VPZ(FMINV, fminv)
4092DO_VPZ(FMAXV, fmaxv)
4093
3887c038
RH
4094/*
4095 *** SVE Floating Point Unary Operations - Unpredicated Group
4096 */
4097
4098static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4099{
4100 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4101 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
4102
4103 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4104 vec_full_reg_offset(s, a->rn),
4105 status, vsz, vsz, 0, fn);
4106 tcg_temp_free_ptr(status);
4107}
4108
3a7be554 4109static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4110{
4111 static gen_helper_gvec_2_ptr * const fns[3] = {
4112 gen_helper_gvec_frecpe_h,
4113 gen_helper_gvec_frecpe_s,
4114 gen_helper_gvec_frecpe_d,
4115 };
4116 if (a->esz == 0) {
4117 return false;
4118 }
4119 if (sve_access_check(s)) {
4120 do_zz_fp(s, a, fns[a->esz - 1]);
4121 }
4122 return true;
4123}
4124
3a7be554 4125static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4126{
4127 static gen_helper_gvec_2_ptr * const fns[3] = {
4128 gen_helper_gvec_frsqrte_h,
4129 gen_helper_gvec_frsqrte_s,
4130 gen_helper_gvec_frsqrte_d,
4131 };
4132 if (a->esz == 0) {
4133 return false;
4134 }
4135 if (sve_access_check(s)) {
4136 do_zz_fp(s, a, fns[a->esz - 1]);
4137 }
4138 return true;
4139}
4140
4d2e2a03
RH
4141/*
4142 *** SVE Floating Point Compare with Zero Group
4143 */
4144
4145static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4146 gen_helper_gvec_3_ptr *fn)
4147{
4148 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4149 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4150
4151 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4152 vec_full_reg_offset(s, a->rn),
4153 pred_full_reg_offset(s, a->pg),
4154 status, vsz, vsz, 0, fn);
4155 tcg_temp_free_ptr(status);
4156}
4157
4158#define DO_PPZ(NAME, name) \
3a7be554 4159static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4160{ \
4161 static gen_helper_gvec_3_ptr * const fns[3] = { \
4162 gen_helper_sve_##name##_h, \
4163 gen_helper_sve_##name##_s, \
4164 gen_helper_sve_##name##_d, \
4165 }; \
4166 if (a->esz == 0) { \
4167 return false; \
4168 } \
4169 if (sve_access_check(s)) { \
4170 do_ppz_fp(s, a, fns[a->esz - 1]); \
4171 } \
4172 return true; \
4173}
4174
4175DO_PPZ(FCMGE_ppz0, fcmge0)
4176DO_PPZ(FCMGT_ppz0, fcmgt0)
4177DO_PPZ(FCMLE_ppz0, fcmle0)
4178DO_PPZ(FCMLT_ppz0, fcmlt0)
4179DO_PPZ(FCMEQ_ppz0, fcmeq0)
4180DO_PPZ(FCMNE_ppz0, fcmne0)
4181
4182#undef DO_PPZ
4183
67fcd9ad
RH
4184/*
4185 *** SVE floating-point trig multiply-add coefficient
4186 */
4187
3a7be554 4188static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4189{
4190 static gen_helper_gvec_3_ptr * const fns[3] = {
4191 gen_helper_sve_ftmad_h,
4192 gen_helper_sve_ftmad_s,
4193 gen_helper_sve_ftmad_d,
4194 };
4195
4196 if (a->esz == 0) {
4197 return false;
4198 }
4199 if (sve_access_check(s)) {
4200 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4201 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4202 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4203 vec_full_reg_offset(s, a->rn),
4204 vec_full_reg_offset(s, a->rm),
4205 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4206 tcg_temp_free_ptr(status);
4207 }
4208 return true;
4209}
4210
7f9ddf64
RH
4211/*
4212 *** SVE Floating Point Accumulating Reduction Group
4213 */
4214
3a7be554 4215static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4216{
4217 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4218 TCGv_ptr, TCGv_ptr, TCGv_i32);
4219 static fadda_fn * const fns[3] = {
4220 gen_helper_sve_fadda_h,
4221 gen_helper_sve_fadda_s,
4222 gen_helper_sve_fadda_d,
4223 };
4224 unsigned vsz = vec_full_reg_size(s);
4225 TCGv_ptr t_rm, t_pg, t_fpst;
4226 TCGv_i64 t_val;
4227 TCGv_i32 t_desc;
4228
4229 if (a->esz == 0) {
4230 return false;
4231 }
4232 if (!sve_access_check(s)) {
4233 return true;
4234 }
4235
4236 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4237 t_rm = tcg_temp_new_ptr();
4238 t_pg = tcg_temp_new_ptr();
4239 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4240 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4241 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4242 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
4243
4244 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4245
7f9ddf64
RH
4246 tcg_temp_free_ptr(t_fpst);
4247 tcg_temp_free_ptr(t_pg);
4248 tcg_temp_free_ptr(t_rm);
4249
4250 write_fp_dreg(s, a->rd, t_val);
4251 tcg_temp_free_i64(t_val);
4252 return true;
4253}
4254
29b80469
RH
4255/*
4256 *** SVE Floating Point Arithmetic - Unpredicated Group
4257 */
4258
4259static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4260 gen_helper_gvec_3_ptr *fn)
4261{
4262 if (fn == NULL) {
4263 return false;
4264 }
4265 if (sve_access_check(s)) {
4266 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4267 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4268 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4269 vec_full_reg_offset(s, a->rn),
4270 vec_full_reg_offset(s, a->rm),
4271 status, vsz, vsz, 0, fn);
4272 tcg_temp_free_ptr(status);
4273 }
4274 return true;
4275}
4276
4277
4278#define DO_FP3(NAME, name) \
3a7be554 4279static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4280{ \
4281 static gen_helper_gvec_3_ptr * const fns[4] = { \
4282 NULL, gen_helper_gvec_##name##_h, \
4283 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4284 }; \
4285 return do_zzz_fp(s, a, fns[a->esz]); \
4286}
4287
4288DO_FP3(FADD_zzz, fadd)
4289DO_FP3(FSUB_zzz, fsub)
4290DO_FP3(FMUL_zzz, fmul)
4291DO_FP3(FTSMUL, ftsmul)
4292DO_FP3(FRECPS, recps)
4293DO_FP3(FRSQRTS, rsqrts)
4294
4295#undef DO_FP3
4296
ec3b87c2
RH
4297/*
4298 *** SVE Floating Point Arithmetic - Predicated Group
4299 */
4300
4301static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4302 gen_helper_gvec_4_ptr *fn)
4303{
4304 if (fn == NULL) {
4305 return false;
4306 }
4307 if (sve_access_check(s)) {
4308 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4309 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4310 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4311 vec_full_reg_offset(s, a->rn),
4312 vec_full_reg_offset(s, a->rm),
4313 pred_full_reg_offset(s, a->pg),
4314 status, vsz, vsz, 0, fn);
4315 tcg_temp_free_ptr(status);
4316 }
4317 return true;
4318}
4319
4320#define DO_FP3(NAME, name) \
3a7be554 4321static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4322{ \
4323 static gen_helper_gvec_4_ptr * const fns[4] = { \
4324 NULL, gen_helper_sve_##name##_h, \
4325 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4326 }; \
4327 return do_zpzz_fp(s, a, fns[a->esz]); \
4328}
4329
4330DO_FP3(FADD_zpzz, fadd)
4331DO_FP3(FSUB_zpzz, fsub)
4332DO_FP3(FMUL_zpzz, fmul)
4333DO_FP3(FMIN_zpzz, fmin)
4334DO_FP3(FMAX_zpzz, fmax)
4335DO_FP3(FMINNM_zpzz, fminnum)
4336DO_FP3(FMAXNM_zpzz, fmaxnum)
4337DO_FP3(FABD, fabd)
4338DO_FP3(FSCALE, fscalbn)
4339DO_FP3(FDIV, fdiv)
4340DO_FP3(FMULX, fmulx)
4341
4342#undef DO_FP3
8092c6a3 4343
cc48affe
RH
4344typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4345 TCGv_i64, TCGv_ptr, TCGv_i32);
4346
4347static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4348 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4349{
4350 unsigned vsz = vec_full_reg_size(s);
4351 TCGv_ptr t_zd, t_zn, t_pg, status;
4352 TCGv_i32 desc;
4353
4354 t_zd = tcg_temp_new_ptr();
4355 t_zn = tcg_temp_new_ptr();
4356 t_pg = tcg_temp_new_ptr();
4357 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4358 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4359 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4360
cdfb22bb 4361 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4362 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4363 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4364
cc48affe
RH
4365 tcg_temp_free_ptr(status);
4366 tcg_temp_free_ptr(t_pg);
4367 tcg_temp_free_ptr(t_zn);
4368 tcg_temp_free_ptr(t_zd);
4369}
4370
4371static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4372 gen_helper_sve_fp2scalar *fn)
4373{
138a1f7b
RH
4374 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4375 tcg_constant_i64(imm), fn);
cc48affe
RH
4376}
4377
4378#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4379static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4380{ \
4381 static gen_helper_sve_fp2scalar * const fns[3] = { \
4382 gen_helper_sve_##name##_h, \
4383 gen_helper_sve_##name##_s, \
4384 gen_helper_sve_##name##_d \
4385 }; \
4386 static uint64_t const val[3][2] = { \
4387 { float16_##const0, float16_##const1 }, \
4388 { float32_##const0, float32_##const1 }, \
4389 { float64_##const0, float64_##const1 }, \
4390 }; \
4391 if (a->esz == 0) { \
4392 return false; \
4393 } \
4394 if (sve_access_check(s)) { \
4395 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4396 } \
4397 return true; \
4398}
4399
cc48affe
RH
4400DO_FP_IMM(FADD, fadds, half, one)
4401DO_FP_IMM(FSUB, fsubs, half, one)
4402DO_FP_IMM(FMUL, fmuls, half, two)
4403DO_FP_IMM(FSUBR, fsubrs, half, one)
4404DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4405DO_FP_IMM(FMINNM, fminnms, zero, one)
4406DO_FP_IMM(FMAX, fmaxs, zero, one)
4407DO_FP_IMM(FMIN, fmins, zero, one)
4408
4409#undef DO_FP_IMM
4410
abfdefd5
RH
4411static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4412 gen_helper_gvec_4_ptr *fn)
4413{
4414 if (fn == NULL) {
4415 return false;
4416 }
4417 if (sve_access_check(s)) {
4418 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4419 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4420 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4421 vec_full_reg_offset(s, a->rn),
4422 vec_full_reg_offset(s, a->rm),
4423 pred_full_reg_offset(s, a->pg),
4424 status, vsz, vsz, 0, fn);
4425 tcg_temp_free_ptr(status);
4426 }
4427 return true;
4428}
4429
4430#define DO_FPCMP(NAME, name) \
3a7be554 4431static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4432{ \
4433 static gen_helper_gvec_4_ptr * const fns[4] = { \
4434 NULL, gen_helper_sve_##name##_h, \
4435 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4436 }; \
4437 return do_fp_cmp(s, a, fns[a->esz]); \
4438}
4439
4440DO_FPCMP(FCMGE, fcmge)
4441DO_FPCMP(FCMGT, fcmgt)
4442DO_FPCMP(FCMEQ, fcmeq)
4443DO_FPCMP(FCMNE, fcmne)
4444DO_FPCMP(FCMUO, fcmuo)
4445DO_FPCMP(FACGE, facge)
4446DO_FPCMP(FACGT, facgt)
4447
4448#undef DO_FPCMP
4449
3a7be554 4450static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4451{
4452 static gen_helper_gvec_4_ptr * const fns[3] = {
4453 gen_helper_sve_fcadd_h,
4454 gen_helper_sve_fcadd_s,
4455 gen_helper_sve_fcadd_d
4456 };
4457
4458 if (a->esz == 0) {
4459 return false;
4460 }
4461 if (sve_access_check(s)) {
4462 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4463 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4464 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4465 vec_full_reg_offset(s, a->rn),
4466 vec_full_reg_offset(s, a->rm),
4467 pred_full_reg_offset(s, a->pg),
4468 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4469 tcg_temp_free_ptr(status);
4470 }
4471 return true;
4472}
4473
08975da9
RH
4474static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4475 gen_helper_gvec_5_ptr *fn)
6ceabaad 4476{
08975da9 4477 if (a->esz == 0) {
6ceabaad
RH
4478 return false;
4479 }
08975da9
RH
4480 if (sve_access_check(s)) {
4481 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4482 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4483 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4484 vec_full_reg_offset(s, a->rn),
4485 vec_full_reg_offset(s, a->rm),
4486 vec_full_reg_offset(s, a->ra),
4487 pred_full_reg_offset(s, a->pg),
4488 status, vsz, vsz, 0, fn);
4489 tcg_temp_free_ptr(status);
6ceabaad 4490 }
6ceabaad
RH
4491 return true;
4492}
4493
4494#define DO_FMLA(NAME, name) \
3a7be554 4495static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4496{ \
08975da9 4497 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4498 NULL, gen_helper_sve_##name##_h, \
4499 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4500 }; \
4501 return do_fmla(s, a, fns[a->esz]); \
4502}
4503
4504DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4505DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4506DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4507DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4508
4509#undef DO_FMLA
4510
3a7be554 4511static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4512{
08975da9
RH
4513 static gen_helper_gvec_5_ptr * const fns[4] = {
4514 NULL,
05f48bab
RH
4515 gen_helper_sve_fcmla_zpzzz_h,
4516 gen_helper_sve_fcmla_zpzzz_s,
4517 gen_helper_sve_fcmla_zpzzz_d,
4518 };
4519
4520 if (a->esz == 0) {
4521 return false;
4522 }
4523 if (sve_access_check(s)) {
4524 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4525 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4526 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4527 vec_full_reg_offset(s, a->rn),
4528 vec_full_reg_offset(s, a->rm),
4529 vec_full_reg_offset(s, a->ra),
4530 pred_full_reg_offset(s, a->pg),
4531 status, vsz, vsz, a->rot, fns[a->esz]);
4532 tcg_temp_free_ptr(status);
05f48bab
RH
4533 }
4534 return true;
4535}
4536
3a7be554 4537static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4538{
636ddeb1 4539 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4540 gen_helper_gvec_fcmlah_idx,
4541 gen_helper_gvec_fcmlas_idx,
4542 };
4543
4544 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4545 tcg_debug_assert(a->rd == a->ra);
4546 if (sve_access_check(s)) {
4547 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4548 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4549 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4550 vec_full_reg_offset(s, a->rn),
4551 vec_full_reg_offset(s, a->rm),
636ddeb1 4552 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4553 status, vsz, vsz,
4554 a->index * 4 + a->rot,
4555 fns[a->esz - 1]);
4556 tcg_temp_free_ptr(status);
4557 }
4558 return true;
4559}
4560
8092c6a3
RH
4561/*
4562 *** SVE Floating Point Unary Operations Predicated Group
4563 */
4564
4565static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4566 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4567{
4568 if (sve_access_check(s)) {
4569 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4570 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4571 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4572 vec_full_reg_offset(s, rn),
4573 pred_full_reg_offset(s, pg),
4574 status, vsz, vsz, 0, fn);
4575 tcg_temp_free_ptr(status);
4576 }
4577 return true;
4578}
4579
3a7be554 4580static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4581{
e4ab5124 4582 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4583}
4584
3a7be554 4585static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4586{
4587 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4588}
4589
d29b17ca
RH
4590static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4591{
4592 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4593 return false;
4594 }
4595 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4596}
4597
3a7be554 4598static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4599{
e4ab5124 4600 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4601}
4602
3a7be554 4603static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4604{
4605 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4606}
4607
3a7be554 4608static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4609{
4610 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4611}
4612
3a7be554 4613static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4614{
4615 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4616}
4617
3a7be554 4618static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4619{
4620 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4621}
4622
3a7be554 4623static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4624{
4625 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4626}
4627
3a7be554 4628static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4629{
4630 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4631}
4632
3a7be554 4633static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4634{
4635 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4636}
4637
3a7be554 4638static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4639{
4640 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4641}
4642
3a7be554 4643static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4644{
4645 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4646}
4647
3a7be554 4648static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4649{
4650 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4651}
4652
3a7be554 4653static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4654{
4655 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4656}
4657
3a7be554 4658static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4659{
4660 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4661}
4662
3a7be554 4663static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4664{
4665 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4666}
4667
3a7be554 4668static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4669{
4670 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4671}
4672
3a7be554 4673static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4674{
4675 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4676}
4677
3a7be554 4678static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4679{
4680 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4681}
4682
3a7be554 4683static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4684{
4685 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4686}
4687
cda3c753
RH
4688static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4689 gen_helper_sve_frint_h,
4690 gen_helper_sve_frint_s,
4691 gen_helper_sve_frint_d
4692};
4693
3a7be554 4694static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4695{
4696 if (a->esz == 0) {
4697 return false;
4698 }
4699 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4700 frint_fns[a->esz - 1]);
4701}
4702
3a7be554 4703static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4704{
4705 static gen_helper_gvec_3_ptr * const fns[3] = {
4706 gen_helper_sve_frintx_h,
4707 gen_helper_sve_frintx_s,
4708 gen_helper_sve_frintx_d
4709 };
4710 if (a->esz == 0) {
4711 return false;
4712 }
4713 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4714}
4715
95365277
SL
4716static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4717 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4718{
cda3c753
RH
4719 if (sve_access_check(s)) {
4720 unsigned vsz = vec_full_reg_size(s);
4721 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4722 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4723
4724 gen_helper_set_rmode(tmode, tmode, status);
4725
4726 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4727 vec_full_reg_offset(s, a->rn),
4728 pred_full_reg_offset(s, a->pg),
95365277 4729 status, vsz, vsz, 0, fn);
cda3c753
RH
4730
4731 gen_helper_set_rmode(tmode, tmode, status);
4732 tcg_temp_free_i32(tmode);
4733 tcg_temp_free_ptr(status);
4734 }
4735 return true;
4736}
4737
3a7be554 4738static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4739{
95365277
SL
4740 if (a->esz == 0) {
4741 return false;
4742 }
4743 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4744}
4745
3a7be554 4746static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4747{
95365277
SL
4748 if (a->esz == 0) {
4749 return false;
4750 }
4751 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4752}
4753
3a7be554 4754static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4755{
95365277
SL
4756 if (a->esz == 0) {
4757 return false;
4758 }
4759 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4760}
4761
3a7be554 4762static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4763{
95365277
SL
4764 if (a->esz == 0) {
4765 return false;
4766 }
4767 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4768}
4769
3a7be554 4770static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4771{
95365277
SL
4772 if (a->esz == 0) {
4773 return false;
4774 }
4775 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4776}
4777
3a7be554 4778static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4779{
4780 static gen_helper_gvec_3_ptr * const fns[3] = {
4781 gen_helper_sve_frecpx_h,
4782 gen_helper_sve_frecpx_s,
4783 gen_helper_sve_frecpx_d
4784 };
4785 if (a->esz == 0) {
4786 return false;
4787 }
4788 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4789}
4790
3a7be554 4791static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4792{
4793 static gen_helper_gvec_3_ptr * const fns[3] = {
4794 gen_helper_sve_fsqrt_h,
4795 gen_helper_sve_fsqrt_s,
4796 gen_helper_sve_fsqrt_d
4797 };
4798 if (a->esz == 0) {
4799 return false;
4800 }
4801 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4802}
4803
3a7be554 4804static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4805{
4806 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4807}
4808
3a7be554 4809static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4810{
4811 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4812}
4813
3a7be554 4814static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4815{
4816 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4817}
4818
3a7be554 4819static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4820{
4821 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4822}
4823
3a7be554 4824static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4825{
4826 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4827}
4828
3a7be554 4829static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4830{
4831 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4832}
4833
3a7be554 4834static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4835{
4836 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4837}
4838
3a7be554 4839static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4840{
4841 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4842}
4843
3a7be554 4844static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4845{
4846 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4847}
4848
3a7be554 4849static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4850{
4851 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4852}
4853
3a7be554 4854static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4855{
4856 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4857}
4858
3a7be554 4859static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4860{
4861 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4862}
4863
3a7be554 4864static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4865{
4866 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4867}
4868
3a7be554 4869static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4870{
4871 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4872}
4873
d1822297
RH
4874/*
4875 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4876 */
4877
4878/* Subroutine loading a vector register at VOFS of LEN bytes.
4879 * The load should begin at the address Rn + IMM.
4880 */
4881
19f2acc9 4882static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4883{
19f2acc9
RH
4884 int len_align = QEMU_ALIGN_DOWN(len, 8);
4885 int len_remain = len % 8;
4886 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4887 int midx = get_mem_index(s);
b2aa8879 4888 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4889
b2aa8879
RH
4890 dirty_addr = tcg_temp_new_i64();
4891 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4892 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4893 tcg_temp_free_i64(dirty_addr);
d1822297 4894
b2aa8879
RH
4895 /*
4896 * Note that unpredicated load/store of vector/predicate registers
d1822297 4897 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4898 * operations on larger quantities.
d1822297
RH
4899 * Attempt to keep code expansion to a minimum by limiting the
4900 * amount of unrolling done.
4901 */
4902 if (nparts <= 4) {
4903 int i;
4904
b2aa8879 4905 t0 = tcg_temp_new_i64();
d1822297 4906 for (i = 0; i < len_align; i += 8) {
fc313c64 4907 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4908 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4909 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4910 }
b2aa8879 4911 tcg_temp_free_i64(t0);
d1822297
RH
4912 } else {
4913 TCGLabel *loop = gen_new_label();
4914 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4915
b2aa8879
RH
4916 /* Copy the clean address into a local temp, live across the loop. */
4917 t0 = clean_addr;
4b4dc975 4918 clean_addr = new_tmp_a64_local(s);
b2aa8879 4919 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4920
b2aa8879 4921 gen_set_label(loop);
d1822297 4922
b2aa8879 4923 t0 = tcg_temp_new_i64();
fc313c64 4924 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4925 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4926
b2aa8879 4927 tp = tcg_temp_new_ptr();
d1822297
RH
4928 tcg_gen_add_ptr(tp, cpu_env, i);
4929 tcg_gen_addi_ptr(i, i, 8);
4930 tcg_gen_st_i64(t0, tp, vofs);
4931 tcg_temp_free_ptr(tp);
b2aa8879 4932 tcg_temp_free_i64(t0);
d1822297
RH
4933
4934 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4935 tcg_temp_free_ptr(i);
4936 }
4937
b2aa8879
RH
4938 /*
4939 * Predicate register loads can be any multiple of 2.
d1822297
RH
4940 * Note that we still store the entire 64-bit unit into cpu_env.
4941 */
4942 if (len_remain) {
b2aa8879 4943 t0 = tcg_temp_new_i64();
d1822297
RH
4944 switch (len_remain) {
4945 case 2:
4946 case 4:
4947 case 8:
b2aa8879
RH
4948 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4949 MO_LE | ctz32(len_remain));
d1822297
RH
4950 break;
4951
4952 case 6:
4953 t1 = tcg_temp_new_i64();
b2aa8879
RH
4954 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4955 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4956 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4957 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4958 tcg_temp_free_i64(t1);
4959 break;
4960
4961 default:
4962 g_assert_not_reached();
4963 }
4964 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4965 tcg_temp_free_i64(t0);
d1822297 4966 }
d1822297
RH
4967}
4968
5047c204 4969/* Similarly for stores. */
19f2acc9 4970static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4971{
19f2acc9
RH
4972 int len_align = QEMU_ALIGN_DOWN(len, 8);
4973 int len_remain = len % 8;
4974 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4975 int midx = get_mem_index(s);
bba87d0a 4976 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4977
bba87d0a
RH
4978 dirty_addr = tcg_temp_new_i64();
4979 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4980 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4981 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4982
4983 /* Note that unpredicated load/store of vector/predicate registers
4984 * are defined as a stream of bytes, which equates to little-endian
4985 * operations on larger quantities. There is no nice way to force
4986 * a little-endian store for aarch64_be-linux-user out of line.
4987 *
4988 * Attempt to keep code expansion to a minimum by limiting the
4989 * amount of unrolling done.
4990 */
4991 if (nparts <= 4) {
4992 int i;
4993
bba87d0a 4994 t0 = tcg_temp_new_i64();
5047c204
RH
4995 for (i = 0; i < len_align; i += 8) {
4996 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4997 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4998 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4999 }
bba87d0a 5000 tcg_temp_free_i64(t0);
5047c204
RH
5001 } else {
5002 TCGLabel *loop = gen_new_label();
bba87d0a 5003 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 5004
bba87d0a
RH
5005 /* Copy the clean address into a local temp, live across the loop. */
5006 t0 = clean_addr;
4b4dc975 5007 clean_addr = new_tmp_a64_local(s);
bba87d0a 5008 tcg_gen_mov_i64(clean_addr, t0);
5047c204 5009
bba87d0a 5010 gen_set_label(loop);
5047c204 5011
bba87d0a
RH
5012 t0 = tcg_temp_new_i64();
5013 tp = tcg_temp_new_ptr();
5014 tcg_gen_add_ptr(tp, cpu_env, i);
5015 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 5016 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
5017 tcg_temp_free_ptr(tp);
5018
fc313c64 5019 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
5020 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5021 tcg_temp_free_i64(t0);
5047c204
RH
5022
5023 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5024 tcg_temp_free_ptr(i);
5025 }
5026
5027 /* Predicate register stores can be any multiple of 2. */
5028 if (len_remain) {
bba87d0a 5029 t0 = tcg_temp_new_i64();
5047c204 5030 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
5031
5032 switch (len_remain) {
5033 case 2:
5034 case 4:
5035 case 8:
bba87d0a
RH
5036 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5037 MO_LE | ctz32(len_remain));
5047c204
RH
5038 break;
5039
5040 case 6:
bba87d0a
RH
5041 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5042 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 5043 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 5044 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
5045 break;
5046
5047 default:
5048 g_assert_not_reached();
5049 }
bba87d0a 5050 tcg_temp_free_i64(t0);
5047c204 5051 }
5047c204
RH
5052}
5053
3a7be554 5054static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
5055{
5056 if (sve_access_check(s)) {
5057 int size = vec_full_reg_size(s);
5058 int off = vec_full_reg_offset(s, a->rd);
5059 do_ldr(s, off, size, a->rn, a->imm * size);
5060 }
5061 return true;
5062}
5063
3a7be554 5064static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
5065{
5066 if (sve_access_check(s)) {
5067 int size = pred_full_reg_size(s);
5068 int off = pred_full_reg_offset(s, a->rd);
5069 do_ldr(s, off, size, a->rn, a->imm * size);
5070 }
5071 return true;
5072}
c4e7c493 5073
3a7be554 5074static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
5075{
5076 if (sve_access_check(s)) {
5077 int size = vec_full_reg_size(s);
5078 int off = vec_full_reg_offset(s, a->rd);
5079 do_str(s, off, size, a->rn, a->imm * size);
5080 }
5081 return true;
5082}
5083
3a7be554 5084static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
5085{
5086 if (sve_access_check(s)) {
5087 int size = pred_full_reg_size(s);
5088 int off = pred_full_reg_offset(s, a->rd);
5089 do_str(s, off, size, a->rn, a->imm * size);
5090 }
5091 return true;
5092}
5093
c4e7c493
RH
5094/*
5095 *** SVE Memory - Contiguous Load Group
5096 */
5097
5098/* The memory mode of the dtype. */
14776ab5 5099static const MemOp dtype_mop[16] = {
c4e7c493
RH
5100 MO_UB, MO_UB, MO_UB, MO_UB,
5101 MO_SL, MO_UW, MO_UW, MO_UW,
5102 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 5103 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
5104};
5105
5106#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5107
5108/* The vector element size of dtype. */
5109static const uint8_t dtype_esz[16] = {
5110 0, 1, 2, 3,
5111 3, 1, 2, 3,
5112 3, 2, 2, 3,
5113 3, 2, 1, 3
5114};
5115
5116static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
5117 int dtype, uint32_t mte_n, bool is_write,
5118 gen_helper_gvec_mem *fn)
c4e7c493
RH
5119{
5120 unsigned vsz = vec_full_reg_size(s);
5121 TCGv_ptr t_pg;
206adacf 5122 int desc = 0;
c4e7c493 5123
206adacf
RH
5124 /*
5125 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
5126 * registers as pointers, so encode the regno into the data field.
5127 * For consistency, do this even for LD1.
5128 */
9473d0ec 5129 if (s->mte_active[0]) {
206adacf
RH
5130 int msz = dtype_msz(dtype);
5131
5132 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5133 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5134 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5135 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5136 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5137 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5138 } else {
5139 addr = clean_data_tbi(s, addr);
206adacf 5140 }
9473d0ec 5141
206adacf 5142 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
5143 t_pg = tcg_temp_new_ptr();
5144
5145 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 5146 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
5147
5148 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
5149}
5150
c182c6db
RH
5151/* Indexed by [mte][be][dtype][nreg] */
5152static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5153 { /* mte inactive, little-endian */
5154 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5155 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5156 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5157 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5158 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5159
5160 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5161 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5162 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5163 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5164 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5165
5166 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5167 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5168 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5169 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5170 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5171
5172 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5173 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5174 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5175 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5176 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5177
5178 /* mte inactive, big-endian */
5179 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5180 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5181 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5182 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5183 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5184
5185 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5186 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5187 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5188 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5189 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5190
5191 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5192 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5193 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5194 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5195 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5196
5197 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5198 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5199 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5200 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5201 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5202
5203 { /* mte active, little-endian */
5204 { { gen_helper_sve_ld1bb_r_mte,
5205 gen_helper_sve_ld2bb_r_mte,
5206 gen_helper_sve_ld3bb_r_mte,
5207 gen_helper_sve_ld4bb_r_mte },
5208 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5209 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5210 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5211
5212 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5213 { gen_helper_sve_ld1hh_le_r_mte,
5214 gen_helper_sve_ld2hh_le_r_mte,
5215 gen_helper_sve_ld3hh_le_r_mte,
5216 gen_helper_sve_ld4hh_le_r_mte },
5217 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5218 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5219
5220 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5221 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5222 { gen_helper_sve_ld1ss_le_r_mte,
5223 gen_helper_sve_ld2ss_le_r_mte,
5224 gen_helper_sve_ld3ss_le_r_mte,
5225 gen_helper_sve_ld4ss_le_r_mte },
5226 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5227
5228 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5229 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5230 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5231 { gen_helper_sve_ld1dd_le_r_mte,
5232 gen_helper_sve_ld2dd_le_r_mte,
5233 gen_helper_sve_ld3dd_le_r_mte,
5234 gen_helper_sve_ld4dd_le_r_mte } },
5235
5236 /* mte active, big-endian */
5237 { { gen_helper_sve_ld1bb_r_mte,
5238 gen_helper_sve_ld2bb_r_mte,
5239 gen_helper_sve_ld3bb_r_mte,
5240 gen_helper_sve_ld4bb_r_mte },
5241 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5242 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5243 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5244
5245 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5246 { gen_helper_sve_ld1hh_be_r_mte,
5247 gen_helper_sve_ld2hh_be_r_mte,
5248 gen_helper_sve_ld3hh_be_r_mte,
5249 gen_helper_sve_ld4hh_be_r_mte },
5250 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5251 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5252
5253 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5254 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5255 { gen_helper_sve_ld1ss_be_r_mte,
5256 gen_helper_sve_ld2ss_be_r_mte,
5257 gen_helper_sve_ld3ss_be_r_mte,
5258 gen_helper_sve_ld4ss_be_r_mte },
5259 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5260
5261 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5262 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5263 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5264 { gen_helper_sve_ld1dd_be_r_mte,
5265 gen_helper_sve_ld2dd_be_r_mte,
5266 gen_helper_sve_ld3dd_be_r_mte,
5267 gen_helper_sve_ld4dd_be_r_mte } } },
5268};
5269
c4e7c493
RH
5270static void do_ld_zpa(DisasContext *s, int zt, int pg,
5271 TCGv_i64 addr, int dtype, int nreg)
5272{
206adacf 5273 gen_helper_gvec_mem *fn
c182c6db 5274 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5275
206adacf
RH
5276 /*
5277 * While there are holes in the table, they are not
c4e7c493
RH
5278 * accessible via the instruction encoding.
5279 */
5280 assert(fn != NULL);
206adacf 5281 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5282}
5283
3a7be554 5284static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5285{
5286 if (a->rm == 31) {
5287 return false;
5288 }
5289 if (sve_access_check(s)) {
5290 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5291 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5292 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5293 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5294 }
5295 return true;
5296}
5297
3a7be554 5298static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5299{
5300 if (sve_access_check(s)) {
5301 int vsz = vec_full_reg_size(s);
5302 int elements = vsz >> dtype_esz[a->dtype];
5303 TCGv_i64 addr = new_tmp_a64(s);
5304
5305 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5306 (a->imm * elements * (a->nreg + 1))
5307 << dtype_msz(a->dtype));
5308 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5309 }
5310 return true;
5311}
e2654d75 5312
3a7be554 5313static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5314{
aa13f7c3
RH
5315 static gen_helper_gvec_mem * const fns[2][2][16] = {
5316 { /* mte inactive, little-endian */
5317 { gen_helper_sve_ldff1bb_r,
5318 gen_helper_sve_ldff1bhu_r,
5319 gen_helper_sve_ldff1bsu_r,
5320 gen_helper_sve_ldff1bdu_r,
5321
5322 gen_helper_sve_ldff1sds_le_r,
5323 gen_helper_sve_ldff1hh_le_r,
5324 gen_helper_sve_ldff1hsu_le_r,
5325 gen_helper_sve_ldff1hdu_le_r,
5326
5327 gen_helper_sve_ldff1hds_le_r,
5328 gen_helper_sve_ldff1hss_le_r,
5329 gen_helper_sve_ldff1ss_le_r,
5330 gen_helper_sve_ldff1sdu_le_r,
5331
5332 gen_helper_sve_ldff1bds_r,
5333 gen_helper_sve_ldff1bss_r,
5334 gen_helper_sve_ldff1bhs_r,
5335 gen_helper_sve_ldff1dd_le_r },
5336
5337 /* mte inactive, big-endian */
5338 { gen_helper_sve_ldff1bb_r,
5339 gen_helper_sve_ldff1bhu_r,
5340 gen_helper_sve_ldff1bsu_r,
5341 gen_helper_sve_ldff1bdu_r,
5342
5343 gen_helper_sve_ldff1sds_be_r,
5344 gen_helper_sve_ldff1hh_be_r,
5345 gen_helper_sve_ldff1hsu_be_r,
5346 gen_helper_sve_ldff1hdu_be_r,
5347
5348 gen_helper_sve_ldff1hds_be_r,
5349 gen_helper_sve_ldff1hss_be_r,
5350 gen_helper_sve_ldff1ss_be_r,
5351 gen_helper_sve_ldff1sdu_be_r,
5352
5353 gen_helper_sve_ldff1bds_r,
5354 gen_helper_sve_ldff1bss_r,
5355 gen_helper_sve_ldff1bhs_r,
5356 gen_helper_sve_ldff1dd_be_r } },
5357
5358 { /* mte active, little-endian */
5359 { gen_helper_sve_ldff1bb_r_mte,
5360 gen_helper_sve_ldff1bhu_r_mte,
5361 gen_helper_sve_ldff1bsu_r_mte,
5362 gen_helper_sve_ldff1bdu_r_mte,
5363
5364 gen_helper_sve_ldff1sds_le_r_mte,
5365 gen_helper_sve_ldff1hh_le_r_mte,
5366 gen_helper_sve_ldff1hsu_le_r_mte,
5367 gen_helper_sve_ldff1hdu_le_r_mte,
5368
5369 gen_helper_sve_ldff1hds_le_r_mte,
5370 gen_helper_sve_ldff1hss_le_r_mte,
5371 gen_helper_sve_ldff1ss_le_r_mte,
5372 gen_helper_sve_ldff1sdu_le_r_mte,
5373
5374 gen_helper_sve_ldff1bds_r_mte,
5375 gen_helper_sve_ldff1bss_r_mte,
5376 gen_helper_sve_ldff1bhs_r_mte,
5377 gen_helper_sve_ldff1dd_le_r_mte },
5378
5379 /* mte active, big-endian */
5380 { gen_helper_sve_ldff1bb_r_mte,
5381 gen_helper_sve_ldff1bhu_r_mte,
5382 gen_helper_sve_ldff1bsu_r_mte,
5383 gen_helper_sve_ldff1bdu_r_mte,
5384
5385 gen_helper_sve_ldff1sds_be_r_mte,
5386 gen_helper_sve_ldff1hh_be_r_mte,
5387 gen_helper_sve_ldff1hsu_be_r_mte,
5388 gen_helper_sve_ldff1hdu_be_r_mte,
5389
5390 gen_helper_sve_ldff1hds_be_r_mte,
5391 gen_helper_sve_ldff1hss_be_r_mte,
5392 gen_helper_sve_ldff1ss_be_r_mte,
5393 gen_helper_sve_ldff1sdu_be_r_mte,
5394
5395 gen_helper_sve_ldff1bds_r_mte,
5396 gen_helper_sve_ldff1bss_r_mte,
5397 gen_helper_sve_ldff1bhs_r_mte,
5398 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5399 };
5400
5401 if (sve_access_check(s)) {
5402 TCGv_i64 addr = new_tmp_a64(s);
5403 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5404 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5405 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5406 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5407 }
5408 return true;
5409}
5410
3a7be554 5411static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5412{
aa13f7c3
RH
5413 static gen_helper_gvec_mem * const fns[2][2][16] = {
5414 { /* mte inactive, little-endian */
5415 { gen_helper_sve_ldnf1bb_r,
5416 gen_helper_sve_ldnf1bhu_r,
5417 gen_helper_sve_ldnf1bsu_r,
5418 gen_helper_sve_ldnf1bdu_r,
5419
5420 gen_helper_sve_ldnf1sds_le_r,
5421 gen_helper_sve_ldnf1hh_le_r,
5422 gen_helper_sve_ldnf1hsu_le_r,
5423 gen_helper_sve_ldnf1hdu_le_r,
5424
5425 gen_helper_sve_ldnf1hds_le_r,
5426 gen_helper_sve_ldnf1hss_le_r,
5427 gen_helper_sve_ldnf1ss_le_r,
5428 gen_helper_sve_ldnf1sdu_le_r,
5429
5430 gen_helper_sve_ldnf1bds_r,
5431 gen_helper_sve_ldnf1bss_r,
5432 gen_helper_sve_ldnf1bhs_r,
5433 gen_helper_sve_ldnf1dd_le_r },
5434
5435 /* mte inactive, big-endian */
5436 { gen_helper_sve_ldnf1bb_r,
5437 gen_helper_sve_ldnf1bhu_r,
5438 gen_helper_sve_ldnf1bsu_r,
5439 gen_helper_sve_ldnf1bdu_r,
5440
5441 gen_helper_sve_ldnf1sds_be_r,
5442 gen_helper_sve_ldnf1hh_be_r,
5443 gen_helper_sve_ldnf1hsu_be_r,
5444 gen_helper_sve_ldnf1hdu_be_r,
5445
5446 gen_helper_sve_ldnf1hds_be_r,
5447 gen_helper_sve_ldnf1hss_be_r,
5448 gen_helper_sve_ldnf1ss_be_r,
5449 gen_helper_sve_ldnf1sdu_be_r,
5450
5451 gen_helper_sve_ldnf1bds_r,
5452 gen_helper_sve_ldnf1bss_r,
5453 gen_helper_sve_ldnf1bhs_r,
5454 gen_helper_sve_ldnf1dd_be_r } },
5455
5456 { /* mte inactive, little-endian */
5457 { gen_helper_sve_ldnf1bb_r_mte,
5458 gen_helper_sve_ldnf1bhu_r_mte,
5459 gen_helper_sve_ldnf1bsu_r_mte,
5460 gen_helper_sve_ldnf1bdu_r_mte,
5461
5462 gen_helper_sve_ldnf1sds_le_r_mte,
5463 gen_helper_sve_ldnf1hh_le_r_mte,
5464 gen_helper_sve_ldnf1hsu_le_r_mte,
5465 gen_helper_sve_ldnf1hdu_le_r_mte,
5466
5467 gen_helper_sve_ldnf1hds_le_r_mte,
5468 gen_helper_sve_ldnf1hss_le_r_mte,
5469 gen_helper_sve_ldnf1ss_le_r_mte,
5470 gen_helper_sve_ldnf1sdu_le_r_mte,
5471
5472 gen_helper_sve_ldnf1bds_r_mte,
5473 gen_helper_sve_ldnf1bss_r_mte,
5474 gen_helper_sve_ldnf1bhs_r_mte,
5475 gen_helper_sve_ldnf1dd_le_r_mte },
5476
5477 /* mte inactive, big-endian */
5478 { gen_helper_sve_ldnf1bb_r_mte,
5479 gen_helper_sve_ldnf1bhu_r_mte,
5480 gen_helper_sve_ldnf1bsu_r_mte,
5481 gen_helper_sve_ldnf1bdu_r_mte,
5482
5483 gen_helper_sve_ldnf1sds_be_r_mte,
5484 gen_helper_sve_ldnf1hh_be_r_mte,
5485 gen_helper_sve_ldnf1hsu_be_r_mte,
5486 gen_helper_sve_ldnf1hdu_be_r_mte,
5487
5488 gen_helper_sve_ldnf1hds_be_r_mte,
5489 gen_helper_sve_ldnf1hss_be_r_mte,
5490 gen_helper_sve_ldnf1ss_be_r_mte,
5491 gen_helper_sve_ldnf1sdu_be_r_mte,
5492
5493 gen_helper_sve_ldnf1bds_r_mte,
5494 gen_helper_sve_ldnf1bss_r_mte,
5495 gen_helper_sve_ldnf1bhs_r_mte,
5496 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5497 };
5498
5499 if (sve_access_check(s)) {
5500 int vsz = vec_full_reg_size(s);
5501 int elements = vsz >> dtype_esz[a->dtype];
5502 int off = (a->imm * elements) << dtype_msz(a->dtype);
5503 TCGv_i64 addr = new_tmp_a64(s);
5504
5505 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5506 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5507 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5508 }
5509 return true;
5510}
1a039c7e 5511
c182c6db 5512static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5513{
05abe304
RH
5514 unsigned vsz = vec_full_reg_size(s);
5515 TCGv_ptr t_pg;
7924d239 5516 int poff;
05abe304
RH
5517
5518 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5519 poff = pred_full_reg_offset(s, pg);
5520 if (vsz > 16) {
5521 /*
5522 * Zero-extend the first 16 bits of the predicate into a temporary.
5523 * This avoids triggering an assert making sure we don't have bits
5524 * set within a predicate beyond VQ, but we have lowered VQ to 1
5525 * for this load operation.
5526 */
5527 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5528#if HOST_BIG_ENDIAN
2a99ab2b
RH
5529 poff += 6;
5530#endif
5531 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5532
5533 poff = offsetof(CPUARMState, vfp.preg_tmp);
5534 tcg_gen_st_i64(tmp, cpu_env, poff);
5535 tcg_temp_free_i64(tmp);
5536 }
5537
05abe304 5538 t_pg = tcg_temp_new_ptr();
2a99ab2b 5539 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5540
c182c6db
RH
5541 gen_helper_gvec_mem *fn
5542 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5543 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5544
5545 tcg_temp_free_ptr(t_pg);
05abe304
RH
5546
5547 /* Replicate that first quadword. */
5548 if (vsz > 16) {
7924d239
RH
5549 int doff = vec_full_reg_offset(s, zt);
5550 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5551 }
5552}
5553
3a7be554 5554static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5555{
5556 if (a->rm == 31) {
5557 return false;
5558 }
5559 if (sve_access_check(s)) {
5560 int msz = dtype_msz(a->dtype);
5561 TCGv_i64 addr = new_tmp_a64(s);
5562 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5563 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5564 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5565 }
5566 return true;
5567}
5568
3a7be554 5569static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5570{
5571 if (sve_access_check(s)) {
5572 TCGv_i64 addr = new_tmp_a64(s);
5573 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5574 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5575 }
5576 return true;
5577}
5578
12c563f6
RH
5579static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5580{
5581 unsigned vsz = vec_full_reg_size(s);
5582 unsigned vsz_r32;
5583 TCGv_ptr t_pg;
5584 int poff, doff;
5585
5586 if (vsz < 32) {
5587 /*
5588 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5589 * in the ARM pseudocode, which is the sve_access_check() done
5590 * in our caller. We should not now return false from the caller.
5591 */
5592 unallocated_encoding(s);
5593 return;
5594 }
5595
5596 /* Load the first octaword using the normal predicated load helpers. */
5597
5598 poff = pred_full_reg_offset(s, pg);
5599 if (vsz > 32) {
5600 /*
5601 * Zero-extend the first 32 bits of the predicate into a temporary.
5602 * This avoids triggering an assert making sure we don't have bits
5603 * set within a predicate beyond VQ, but we have lowered VQ to 2
5604 * for this load operation.
5605 */
5606 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5607#if HOST_BIG_ENDIAN
12c563f6
RH
5608 poff += 4;
5609#endif
5610 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5611
5612 poff = offsetof(CPUARMState, vfp.preg_tmp);
5613 tcg_gen_st_i64(tmp, cpu_env, poff);
5614 tcg_temp_free_i64(tmp);
5615 }
5616
5617 t_pg = tcg_temp_new_ptr();
5618 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5619
5620 gen_helper_gvec_mem *fn
5621 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5622 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5623
5624 tcg_temp_free_ptr(t_pg);
5625
5626 /*
5627 * Replicate that first octaword.
5628 * The replication happens in units of 32; if the full vector size
5629 * is not a multiple of 32, the final bits are zeroed.
5630 */
5631 doff = vec_full_reg_offset(s, zt);
5632 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5633 if (vsz >= 64) {
5634 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5635 }
5636 vsz -= vsz_r32;
5637 if (vsz) {
5638 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5639 }
5640}
5641
5642static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5643{
5644 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5645 return false;
5646 }
5647 if (a->rm == 31) {
5648 return false;
5649 }
5650 if (sve_access_check(s)) {
5651 TCGv_i64 addr = new_tmp_a64(s);
5652 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5653 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5654 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5655 }
5656 return true;
5657}
5658
5659static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5660{
5661 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5662 return false;
5663 }
5664 if (sve_access_check(s)) {
5665 TCGv_i64 addr = new_tmp_a64(s);
5666 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5667 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5668 }
5669 return true;
5670}
5671
68459864 5672/* Load and broadcast element. */
3a7be554 5673static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5674{
68459864
RH
5675 unsigned vsz = vec_full_reg_size(s);
5676 unsigned psz = pred_full_reg_size(s);
5677 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5678 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5679 TCGLabel *over;
4ac430e1 5680 TCGv_i64 temp, clean_addr;
68459864 5681
c0ed9166
RH
5682 if (!sve_access_check(s)) {
5683 return true;
5684 }
5685
5686 over = gen_new_label();
5687
68459864
RH
5688 /* If the guarding predicate has no bits set, no load occurs. */
5689 if (psz <= 8) {
5690 /* Reduce the pred_esz_masks value simply to reduce the
5691 * size of the code generated here.
5692 */
5693 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5694 temp = tcg_temp_new_i64();
5695 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5696 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5697 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5698 tcg_temp_free_i64(temp);
5699 } else {
5700 TCGv_i32 t32 = tcg_temp_new_i32();
5701 find_last_active(s, t32, esz, a->pg);
5702 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5703 tcg_temp_free_i32(t32);
5704 }
5705
5706 /* Load the data. */
5707 temp = tcg_temp_new_i64();
d0e372b0 5708 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5709 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5710
5711 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5712 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5713
5714 /* Broadcast to *all* elements. */
5715 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5716 vsz, vsz, temp);
5717 tcg_temp_free_i64(temp);
5718
5719 /* Zero the inactive elements. */
5720 gen_set_label(over);
60245996 5721 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5722}
5723
1a039c7e
RH
5724static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5725 int msz, int esz, int nreg)
5726{
71b9f394
RH
5727 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5728 { { { gen_helper_sve_st1bb_r,
5729 gen_helper_sve_st1bh_r,
5730 gen_helper_sve_st1bs_r,
5731 gen_helper_sve_st1bd_r },
5732 { NULL,
5733 gen_helper_sve_st1hh_le_r,
5734 gen_helper_sve_st1hs_le_r,
5735 gen_helper_sve_st1hd_le_r },
5736 { NULL, NULL,
5737 gen_helper_sve_st1ss_le_r,
5738 gen_helper_sve_st1sd_le_r },
5739 { NULL, NULL, NULL,
5740 gen_helper_sve_st1dd_le_r } },
5741 { { gen_helper_sve_st1bb_r,
5742 gen_helper_sve_st1bh_r,
5743 gen_helper_sve_st1bs_r,
5744 gen_helper_sve_st1bd_r },
5745 { NULL,
5746 gen_helper_sve_st1hh_be_r,
5747 gen_helper_sve_st1hs_be_r,
5748 gen_helper_sve_st1hd_be_r },
5749 { NULL, NULL,
5750 gen_helper_sve_st1ss_be_r,
5751 gen_helper_sve_st1sd_be_r },
5752 { NULL, NULL, NULL,
5753 gen_helper_sve_st1dd_be_r } } },
5754
5755 { { { gen_helper_sve_st1bb_r_mte,
5756 gen_helper_sve_st1bh_r_mte,
5757 gen_helper_sve_st1bs_r_mte,
5758 gen_helper_sve_st1bd_r_mte },
5759 { NULL,
5760 gen_helper_sve_st1hh_le_r_mte,
5761 gen_helper_sve_st1hs_le_r_mte,
5762 gen_helper_sve_st1hd_le_r_mte },
5763 { NULL, NULL,
5764 gen_helper_sve_st1ss_le_r_mte,
5765 gen_helper_sve_st1sd_le_r_mte },
5766 { NULL, NULL, NULL,
5767 gen_helper_sve_st1dd_le_r_mte } },
5768 { { gen_helper_sve_st1bb_r_mte,
5769 gen_helper_sve_st1bh_r_mte,
5770 gen_helper_sve_st1bs_r_mte,
5771 gen_helper_sve_st1bd_r_mte },
5772 { NULL,
5773 gen_helper_sve_st1hh_be_r_mte,
5774 gen_helper_sve_st1hs_be_r_mte,
5775 gen_helper_sve_st1hd_be_r_mte },
5776 { NULL, NULL,
5777 gen_helper_sve_st1ss_be_r_mte,
5778 gen_helper_sve_st1sd_be_r_mte },
5779 { NULL, NULL, NULL,
5780 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5781 };
71b9f394
RH
5782 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5783 { { { gen_helper_sve_st2bb_r,
5784 gen_helper_sve_st2hh_le_r,
5785 gen_helper_sve_st2ss_le_r,
5786 gen_helper_sve_st2dd_le_r },
5787 { gen_helper_sve_st3bb_r,
5788 gen_helper_sve_st3hh_le_r,
5789 gen_helper_sve_st3ss_le_r,
5790 gen_helper_sve_st3dd_le_r },
5791 { gen_helper_sve_st4bb_r,
5792 gen_helper_sve_st4hh_le_r,
5793 gen_helper_sve_st4ss_le_r,
5794 gen_helper_sve_st4dd_le_r } },
5795 { { gen_helper_sve_st2bb_r,
5796 gen_helper_sve_st2hh_be_r,
5797 gen_helper_sve_st2ss_be_r,
5798 gen_helper_sve_st2dd_be_r },
5799 { gen_helper_sve_st3bb_r,
5800 gen_helper_sve_st3hh_be_r,
5801 gen_helper_sve_st3ss_be_r,
5802 gen_helper_sve_st3dd_be_r },
5803 { gen_helper_sve_st4bb_r,
5804 gen_helper_sve_st4hh_be_r,
5805 gen_helper_sve_st4ss_be_r,
5806 gen_helper_sve_st4dd_be_r } } },
5807 { { { gen_helper_sve_st2bb_r_mte,
5808 gen_helper_sve_st2hh_le_r_mte,
5809 gen_helper_sve_st2ss_le_r_mte,
5810 gen_helper_sve_st2dd_le_r_mte },
5811 { gen_helper_sve_st3bb_r_mte,
5812 gen_helper_sve_st3hh_le_r_mte,
5813 gen_helper_sve_st3ss_le_r_mte,
5814 gen_helper_sve_st3dd_le_r_mte },
5815 { gen_helper_sve_st4bb_r_mte,
5816 gen_helper_sve_st4hh_le_r_mte,
5817 gen_helper_sve_st4ss_le_r_mte,
5818 gen_helper_sve_st4dd_le_r_mte } },
5819 { { gen_helper_sve_st2bb_r_mte,
5820 gen_helper_sve_st2hh_be_r_mte,
5821 gen_helper_sve_st2ss_be_r_mte,
5822 gen_helper_sve_st2dd_be_r_mte },
5823 { gen_helper_sve_st3bb_r_mte,
5824 gen_helper_sve_st3hh_be_r_mte,
5825 gen_helper_sve_st3ss_be_r_mte,
5826 gen_helper_sve_st3dd_be_r_mte },
5827 { gen_helper_sve_st4bb_r_mte,
5828 gen_helper_sve_st4hh_be_r_mte,
5829 gen_helper_sve_st4ss_be_r_mte,
5830 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5831 };
5832 gen_helper_gvec_mem *fn;
28d57f2d 5833 int be = s->be_data == MO_BE;
1a039c7e
RH
5834
5835 if (nreg == 0) {
5836 /* ST1 */
71b9f394
RH
5837 fn = fn_single[s->mte_active[0]][be][msz][esz];
5838 nreg = 1;
1a039c7e
RH
5839 } else {
5840 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5841 assert(msz == esz);
71b9f394 5842 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5843 }
5844 assert(fn != NULL);
71b9f394 5845 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5846}
5847
3a7be554 5848static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5849{
5850 if (a->rm == 31 || a->msz > a->esz) {
5851 return false;
5852 }
5853 if (sve_access_check(s)) {
5854 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5855 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5856 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5857 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5858 }
5859 return true;
5860}
5861
3a7be554 5862static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5863{
5864 if (a->msz > a->esz) {
5865 return false;
5866 }
5867 if (sve_access_check(s)) {
5868 int vsz = vec_full_reg_size(s);
5869 int elements = vsz >> a->esz;
5870 TCGv_i64 addr = new_tmp_a64(s);
5871
5872 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5873 (a->imm * elements * (a->nreg + 1)) << a->msz);
5874 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5875 }
5876 return true;
5877}
f6dbf62a
RH
5878
5879/*
5880 *** SVE gather loads / scatter stores
5881 */
5882
500d0484 5883static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5884 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5885 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5886{
5887 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5888 TCGv_ptr t_zm = tcg_temp_new_ptr();
5889 TCGv_ptr t_pg = tcg_temp_new_ptr();
5890 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5891 int desc = 0;
500d0484 5892
d28d12f0
RH
5893 if (s->mte_active[0]) {
5894 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5895 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5896 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5897 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5898 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5899 desc <<= SVE_MTEDESC_SHIFT;
5900 }
cdecb3fc 5901 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5902
5903 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5904 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5905 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5906 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5907
5908 tcg_temp_free_ptr(t_zt);
5909 tcg_temp_free_ptr(t_zm);
5910 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5911}
5912
d28d12f0
RH
5913/* Indexed by [mte][be][ff][xs][u][msz]. */
5914static gen_helper_gvec_mem_scatter * const
5915gather_load_fn32[2][2][2][2][2][3] = {
5916 { /* MTE Inactive */
5917 { /* Little-endian */
5918 { { { gen_helper_sve_ldbss_zsu,
5919 gen_helper_sve_ldhss_le_zsu,
5920 NULL, },
5921 { gen_helper_sve_ldbsu_zsu,
5922 gen_helper_sve_ldhsu_le_zsu,
5923 gen_helper_sve_ldss_le_zsu, } },
5924 { { gen_helper_sve_ldbss_zss,
5925 gen_helper_sve_ldhss_le_zss,
5926 NULL, },
5927 { gen_helper_sve_ldbsu_zss,
5928 gen_helper_sve_ldhsu_le_zss,
5929 gen_helper_sve_ldss_le_zss, } } },
5930
5931 /* First-fault */
5932 { { { gen_helper_sve_ldffbss_zsu,
5933 gen_helper_sve_ldffhss_le_zsu,
5934 NULL, },
5935 { gen_helper_sve_ldffbsu_zsu,
5936 gen_helper_sve_ldffhsu_le_zsu,
5937 gen_helper_sve_ldffss_le_zsu, } },
5938 { { gen_helper_sve_ldffbss_zss,
5939 gen_helper_sve_ldffhss_le_zss,
5940 NULL, },
5941 { gen_helper_sve_ldffbsu_zss,
5942 gen_helper_sve_ldffhsu_le_zss,
5943 gen_helper_sve_ldffss_le_zss, } } } },
5944
5945 { /* Big-endian */
5946 { { { gen_helper_sve_ldbss_zsu,
5947 gen_helper_sve_ldhss_be_zsu,
5948 NULL, },
5949 { gen_helper_sve_ldbsu_zsu,
5950 gen_helper_sve_ldhsu_be_zsu,
5951 gen_helper_sve_ldss_be_zsu, } },
5952 { { gen_helper_sve_ldbss_zss,
5953 gen_helper_sve_ldhss_be_zss,
5954 NULL, },
5955 { gen_helper_sve_ldbsu_zss,
5956 gen_helper_sve_ldhsu_be_zss,
5957 gen_helper_sve_ldss_be_zss, } } },
5958
5959 /* First-fault */
5960 { { { gen_helper_sve_ldffbss_zsu,
5961 gen_helper_sve_ldffhss_be_zsu,
5962 NULL, },
5963 { gen_helper_sve_ldffbsu_zsu,
5964 gen_helper_sve_ldffhsu_be_zsu,
5965 gen_helper_sve_ldffss_be_zsu, } },
5966 { { gen_helper_sve_ldffbss_zss,
5967 gen_helper_sve_ldffhss_be_zss,
5968 NULL, },
5969 { gen_helper_sve_ldffbsu_zss,
5970 gen_helper_sve_ldffhsu_be_zss,
5971 gen_helper_sve_ldffss_be_zss, } } } } },
5972 { /* MTE Active */
5973 { /* Little-endian */
5974 { { { gen_helper_sve_ldbss_zsu_mte,
5975 gen_helper_sve_ldhss_le_zsu_mte,
5976 NULL, },
5977 { gen_helper_sve_ldbsu_zsu_mte,
5978 gen_helper_sve_ldhsu_le_zsu_mte,
5979 gen_helper_sve_ldss_le_zsu_mte, } },
5980 { { gen_helper_sve_ldbss_zss_mte,
5981 gen_helper_sve_ldhss_le_zss_mte,
5982 NULL, },
5983 { gen_helper_sve_ldbsu_zss_mte,
5984 gen_helper_sve_ldhsu_le_zss_mte,
5985 gen_helper_sve_ldss_le_zss_mte, } } },
5986
5987 /* First-fault */
5988 { { { gen_helper_sve_ldffbss_zsu_mte,
5989 gen_helper_sve_ldffhss_le_zsu_mte,
5990 NULL, },
5991 { gen_helper_sve_ldffbsu_zsu_mte,
5992 gen_helper_sve_ldffhsu_le_zsu_mte,
5993 gen_helper_sve_ldffss_le_zsu_mte, } },
5994 { { gen_helper_sve_ldffbss_zss_mte,
5995 gen_helper_sve_ldffhss_le_zss_mte,
5996 NULL, },
5997 { gen_helper_sve_ldffbsu_zss_mte,
5998 gen_helper_sve_ldffhsu_le_zss_mte,
5999 gen_helper_sve_ldffss_le_zss_mte, } } } },
6000
6001 { /* Big-endian */
6002 { { { gen_helper_sve_ldbss_zsu_mte,
6003 gen_helper_sve_ldhss_be_zsu_mte,
6004 NULL, },
6005 { gen_helper_sve_ldbsu_zsu_mte,
6006 gen_helper_sve_ldhsu_be_zsu_mte,
6007 gen_helper_sve_ldss_be_zsu_mte, } },
6008 { { gen_helper_sve_ldbss_zss_mte,
6009 gen_helper_sve_ldhss_be_zss_mte,
6010 NULL, },
6011 { gen_helper_sve_ldbsu_zss_mte,
6012 gen_helper_sve_ldhsu_be_zss_mte,
6013 gen_helper_sve_ldss_be_zss_mte, } } },
6014
6015 /* First-fault */
6016 { { { gen_helper_sve_ldffbss_zsu_mte,
6017 gen_helper_sve_ldffhss_be_zsu_mte,
6018 NULL, },
6019 { gen_helper_sve_ldffbsu_zsu_mte,
6020 gen_helper_sve_ldffhsu_be_zsu_mte,
6021 gen_helper_sve_ldffss_be_zsu_mte, } },
6022 { { gen_helper_sve_ldffbss_zss_mte,
6023 gen_helper_sve_ldffhss_be_zss_mte,
6024 NULL, },
6025 { gen_helper_sve_ldffbsu_zss_mte,
6026 gen_helper_sve_ldffhsu_be_zss_mte,
6027 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
6028};
6029
6030/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6031static gen_helper_gvec_mem_scatter * const
6032gather_load_fn64[2][2][2][3][2][4] = {
6033 { /* MTE Inactive */
6034 { /* Little-endian */
6035 { { { gen_helper_sve_ldbds_zsu,
6036 gen_helper_sve_ldhds_le_zsu,
6037 gen_helper_sve_ldsds_le_zsu,
6038 NULL, },
6039 { gen_helper_sve_ldbdu_zsu,
6040 gen_helper_sve_ldhdu_le_zsu,
6041 gen_helper_sve_ldsdu_le_zsu,
6042 gen_helper_sve_lddd_le_zsu, } },
6043 { { gen_helper_sve_ldbds_zss,
6044 gen_helper_sve_ldhds_le_zss,
6045 gen_helper_sve_ldsds_le_zss,
6046 NULL, },
6047 { gen_helper_sve_ldbdu_zss,
6048 gen_helper_sve_ldhdu_le_zss,
6049 gen_helper_sve_ldsdu_le_zss,
6050 gen_helper_sve_lddd_le_zss, } },
6051 { { gen_helper_sve_ldbds_zd,
6052 gen_helper_sve_ldhds_le_zd,
6053 gen_helper_sve_ldsds_le_zd,
6054 NULL, },
6055 { gen_helper_sve_ldbdu_zd,
6056 gen_helper_sve_ldhdu_le_zd,
6057 gen_helper_sve_ldsdu_le_zd,
6058 gen_helper_sve_lddd_le_zd, } } },
6059
6060 /* First-fault */
6061 { { { gen_helper_sve_ldffbds_zsu,
6062 gen_helper_sve_ldffhds_le_zsu,
6063 gen_helper_sve_ldffsds_le_zsu,
6064 NULL, },
6065 { gen_helper_sve_ldffbdu_zsu,
6066 gen_helper_sve_ldffhdu_le_zsu,
6067 gen_helper_sve_ldffsdu_le_zsu,
6068 gen_helper_sve_ldffdd_le_zsu, } },
6069 { { gen_helper_sve_ldffbds_zss,
6070 gen_helper_sve_ldffhds_le_zss,
6071 gen_helper_sve_ldffsds_le_zss,
6072 NULL, },
6073 { gen_helper_sve_ldffbdu_zss,
6074 gen_helper_sve_ldffhdu_le_zss,
6075 gen_helper_sve_ldffsdu_le_zss,
6076 gen_helper_sve_ldffdd_le_zss, } },
6077 { { gen_helper_sve_ldffbds_zd,
6078 gen_helper_sve_ldffhds_le_zd,
6079 gen_helper_sve_ldffsds_le_zd,
6080 NULL, },
6081 { gen_helper_sve_ldffbdu_zd,
6082 gen_helper_sve_ldffhdu_le_zd,
6083 gen_helper_sve_ldffsdu_le_zd,
6084 gen_helper_sve_ldffdd_le_zd, } } } },
6085 { /* Big-endian */
6086 { { { gen_helper_sve_ldbds_zsu,
6087 gen_helper_sve_ldhds_be_zsu,
6088 gen_helper_sve_ldsds_be_zsu,
6089 NULL, },
6090 { gen_helper_sve_ldbdu_zsu,
6091 gen_helper_sve_ldhdu_be_zsu,
6092 gen_helper_sve_ldsdu_be_zsu,
6093 gen_helper_sve_lddd_be_zsu, } },
6094 { { gen_helper_sve_ldbds_zss,
6095 gen_helper_sve_ldhds_be_zss,
6096 gen_helper_sve_ldsds_be_zss,
6097 NULL, },
6098 { gen_helper_sve_ldbdu_zss,
6099 gen_helper_sve_ldhdu_be_zss,
6100 gen_helper_sve_ldsdu_be_zss,
6101 gen_helper_sve_lddd_be_zss, } },
6102 { { gen_helper_sve_ldbds_zd,
6103 gen_helper_sve_ldhds_be_zd,
6104 gen_helper_sve_ldsds_be_zd,
6105 NULL, },
6106 { gen_helper_sve_ldbdu_zd,
6107 gen_helper_sve_ldhdu_be_zd,
6108 gen_helper_sve_ldsdu_be_zd,
6109 gen_helper_sve_lddd_be_zd, } } },
6110
6111 /* First-fault */
6112 { { { gen_helper_sve_ldffbds_zsu,
6113 gen_helper_sve_ldffhds_be_zsu,
6114 gen_helper_sve_ldffsds_be_zsu,
6115 NULL, },
6116 { gen_helper_sve_ldffbdu_zsu,
6117 gen_helper_sve_ldffhdu_be_zsu,
6118 gen_helper_sve_ldffsdu_be_zsu,
6119 gen_helper_sve_ldffdd_be_zsu, } },
6120 { { gen_helper_sve_ldffbds_zss,
6121 gen_helper_sve_ldffhds_be_zss,
6122 gen_helper_sve_ldffsds_be_zss,
6123 NULL, },
6124 { gen_helper_sve_ldffbdu_zss,
6125 gen_helper_sve_ldffhdu_be_zss,
6126 gen_helper_sve_ldffsdu_be_zss,
6127 gen_helper_sve_ldffdd_be_zss, } },
6128 { { gen_helper_sve_ldffbds_zd,
6129 gen_helper_sve_ldffhds_be_zd,
6130 gen_helper_sve_ldffsds_be_zd,
6131 NULL, },
6132 { gen_helper_sve_ldffbdu_zd,
6133 gen_helper_sve_ldffhdu_be_zd,
6134 gen_helper_sve_ldffsdu_be_zd,
6135 gen_helper_sve_ldffdd_be_zd, } } } } },
6136 { /* MTE Active */
6137 { /* Little-endian */
6138 { { { gen_helper_sve_ldbds_zsu_mte,
6139 gen_helper_sve_ldhds_le_zsu_mte,
6140 gen_helper_sve_ldsds_le_zsu_mte,
6141 NULL, },
6142 { gen_helper_sve_ldbdu_zsu_mte,
6143 gen_helper_sve_ldhdu_le_zsu_mte,
6144 gen_helper_sve_ldsdu_le_zsu_mte,
6145 gen_helper_sve_lddd_le_zsu_mte, } },
6146 { { gen_helper_sve_ldbds_zss_mte,
6147 gen_helper_sve_ldhds_le_zss_mte,
6148 gen_helper_sve_ldsds_le_zss_mte,
6149 NULL, },
6150 { gen_helper_sve_ldbdu_zss_mte,
6151 gen_helper_sve_ldhdu_le_zss_mte,
6152 gen_helper_sve_ldsdu_le_zss_mte,
6153 gen_helper_sve_lddd_le_zss_mte, } },
6154 { { gen_helper_sve_ldbds_zd_mte,
6155 gen_helper_sve_ldhds_le_zd_mte,
6156 gen_helper_sve_ldsds_le_zd_mte,
6157 NULL, },
6158 { gen_helper_sve_ldbdu_zd_mte,
6159 gen_helper_sve_ldhdu_le_zd_mte,
6160 gen_helper_sve_ldsdu_le_zd_mte,
6161 gen_helper_sve_lddd_le_zd_mte, } } },
6162
6163 /* First-fault */
6164 { { { gen_helper_sve_ldffbds_zsu_mte,
6165 gen_helper_sve_ldffhds_le_zsu_mte,
6166 gen_helper_sve_ldffsds_le_zsu_mte,
6167 NULL, },
6168 { gen_helper_sve_ldffbdu_zsu_mte,
6169 gen_helper_sve_ldffhdu_le_zsu_mte,
6170 gen_helper_sve_ldffsdu_le_zsu_mte,
6171 gen_helper_sve_ldffdd_le_zsu_mte, } },
6172 { { gen_helper_sve_ldffbds_zss_mte,
6173 gen_helper_sve_ldffhds_le_zss_mte,
6174 gen_helper_sve_ldffsds_le_zss_mte,
6175 NULL, },
6176 { gen_helper_sve_ldffbdu_zss_mte,
6177 gen_helper_sve_ldffhdu_le_zss_mte,
6178 gen_helper_sve_ldffsdu_le_zss_mte,
6179 gen_helper_sve_ldffdd_le_zss_mte, } },
6180 { { gen_helper_sve_ldffbds_zd_mte,
6181 gen_helper_sve_ldffhds_le_zd_mte,
6182 gen_helper_sve_ldffsds_le_zd_mte,
6183 NULL, },
6184 { gen_helper_sve_ldffbdu_zd_mte,
6185 gen_helper_sve_ldffhdu_le_zd_mte,
6186 gen_helper_sve_ldffsdu_le_zd_mte,
6187 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6188 { /* Big-endian */
6189 { { { gen_helper_sve_ldbds_zsu_mte,
6190 gen_helper_sve_ldhds_be_zsu_mte,
6191 gen_helper_sve_ldsds_be_zsu_mte,
6192 NULL, },
6193 { gen_helper_sve_ldbdu_zsu_mte,
6194 gen_helper_sve_ldhdu_be_zsu_mte,
6195 gen_helper_sve_ldsdu_be_zsu_mte,
6196 gen_helper_sve_lddd_be_zsu_mte, } },
6197 { { gen_helper_sve_ldbds_zss_mte,
6198 gen_helper_sve_ldhds_be_zss_mte,
6199 gen_helper_sve_ldsds_be_zss_mte,
6200 NULL, },
6201 { gen_helper_sve_ldbdu_zss_mte,
6202 gen_helper_sve_ldhdu_be_zss_mte,
6203 gen_helper_sve_ldsdu_be_zss_mte,
6204 gen_helper_sve_lddd_be_zss_mte, } },
6205 { { gen_helper_sve_ldbds_zd_mte,
6206 gen_helper_sve_ldhds_be_zd_mte,
6207 gen_helper_sve_ldsds_be_zd_mte,
6208 NULL, },
6209 { gen_helper_sve_ldbdu_zd_mte,
6210 gen_helper_sve_ldhdu_be_zd_mte,
6211 gen_helper_sve_ldsdu_be_zd_mte,
6212 gen_helper_sve_lddd_be_zd_mte, } } },
6213
6214 /* First-fault */
6215 { { { gen_helper_sve_ldffbds_zsu_mte,
6216 gen_helper_sve_ldffhds_be_zsu_mte,
6217 gen_helper_sve_ldffsds_be_zsu_mte,
6218 NULL, },
6219 { gen_helper_sve_ldffbdu_zsu_mte,
6220 gen_helper_sve_ldffhdu_be_zsu_mte,
6221 gen_helper_sve_ldffsdu_be_zsu_mte,
6222 gen_helper_sve_ldffdd_be_zsu_mte, } },
6223 { { gen_helper_sve_ldffbds_zss_mte,
6224 gen_helper_sve_ldffhds_be_zss_mte,
6225 gen_helper_sve_ldffsds_be_zss_mte,
6226 NULL, },
6227 { gen_helper_sve_ldffbdu_zss_mte,
6228 gen_helper_sve_ldffhdu_be_zss_mte,
6229 gen_helper_sve_ldffsdu_be_zss_mte,
6230 gen_helper_sve_ldffdd_be_zss_mte, } },
6231 { { gen_helper_sve_ldffbds_zd_mte,
6232 gen_helper_sve_ldffhds_be_zd_mte,
6233 gen_helper_sve_ldffsds_be_zd_mte,
6234 NULL, },
6235 { gen_helper_sve_ldffbdu_zd_mte,
6236 gen_helper_sve_ldffhdu_be_zd_mte,
6237 gen_helper_sve_ldffsdu_be_zd_mte,
6238 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6239};
6240
3a7be554 6241static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6242{
6243 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6244 bool be = s->be_data == MO_BE;
6245 bool mte = s->mte_active[0];
673e9fa6
RH
6246
6247 if (!sve_access_check(s)) {
6248 return true;
6249 }
6250
6251 switch (a->esz) {
6252 case MO_32:
d28d12f0 6253 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6254 break;
6255 case MO_64:
d28d12f0 6256 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6257 break;
6258 }
6259 assert(fn != NULL);
6260
6261 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6262 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6263 return true;
6264}
6265
3a7be554 6266static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6267{
6268 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6269 bool be = s->be_data == MO_BE;
6270 bool mte = s->mte_active[0];
673e9fa6
RH
6271
6272 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6273 return false;
6274 }
6275 if (!sve_access_check(s)) {
6276 return true;
6277 }
6278
6279 switch (a->esz) {
6280 case MO_32:
d28d12f0 6281 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6282 break;
6283 case MO_64:
d28d12f0 6284 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6285 break;
6286 }
6287 assert(fn != NULL);
6288
6289 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6290 * by loading the immediate into the scalar parameter.
6291 */
2ccdf94f
RH
6292 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6293 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6294 return true;
6295}
6296
cf327449
SL
6297static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6298{
b17ab470
RH
6299 gen_helper_gvec_mem_scatter *fn = NULL;
6300 bool be = s->be_data == MO_BE;
6301 bool mte = s->mte_active[0];
6302
6303 if (a->esz < a->msz + !a->u) {
6304 return false;
6305 }
cf327449
SL
6306 if (!dc_isar_feature(aa64_sve2, s)) {
6307 return false;
6308 }
b17ab470
RH
6309 if (!sve_access_check(s)) {
6310 return true;
6311 }
6312
6313 switch (a->esz) {
6314 case MO_32:
6315 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6316 break;
6317 case MO_64:
6318 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6319 break;
6320 }
6321 assert(fn != NULL);
6322
6323 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6324 cpu_reg(s, a->rm), a->msz, false, fn);
6325 return true;
cf327449
SL
6326}
6327
d28d12f0
RH
6328/* Indexed by [mte][be][xs][msz]. */
6329static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6330 { /* MTE Inactive */
6331 { /* Little-endian */
6332 { gen_helper_sve_stbs_zsu,
6333 gen_helper_sve_sths_le_zsu,
6334 gen_helper_sve_stss_le_zsu, },
6335 { gen_helper_sve_stbs_zss,
6336 gen_helper_sve_sths_le_zss,
6337 gen_helper_sve_stss_le_zss, } },
6338 { /* Big-endian */
6339 { gen_helper_sve_stbs_zsu,
6340 gen_helper_sve_sths_be_zsu,
6341 gen_helper_sve_stss_be_zsu, },
6342 { gen_helper_sve_stbs_zss,
6343 gen_helper_sve_sths_be_zss,
6344 gen_helper_sve_stss_be_zss, } } },
6345 { /* MTE Active */
6346 { /* Little-endian */
6347 { gen_helper_sve_stbs_zsu_mte,
6348 gen_helper_sve_sths_le_zsu_mte,
6349 gen_helper_sve_stss_le_zsu_mte, },
6350 { gen_helper_sve_stbs_zss_mte,
6351 gen_helper_sve_sths_le_zss_mte,
6352 gen_helper_sve_stss_le_zss_mte, } },
6353 { /* Big-endian */
6354 { gen_helper_sve_stbs_zsu_mte,
6355 gen_helper_sve_sths_be_zsu_mte,
6356 gen_helper_sve_stss_be_zsu_mte, },
6357 { gen_helper_sve_stbs_zss_mte,
6358 gen_helper_sve_sths_be_zss_mte,
6359 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6360};
6361
6362/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6363static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6364 { /* MTE Inactive */
6365 { /* Little-endian */
6366 { gen_helper_sve_stbd_zsu,
6367 gen_helper_sve_sthd_le_zsu,
6368 gen_helper_sve_stsd_le_zsu,
6369 gen_helper_sve_stdd_le_zsu, },
6370 { gen_helper_sve_stbd_zss,
6371 gen_helper_sve_sthd_le_zss,
6372 gen_helper_sve_stsd_le_zss,
6373 gen_helper_sve_stdd_le_zss, },
6374 { gen_helper_sve_stbd_zd,
6375 gen_helper_sve_sthd_le_zd,
6376 gen_helper_sve_stsd_le_zd,
6377 gen_helper_sve_stdd_le_zd, } },
6378 { /* Big-endian */
6379 { gen_helper_sve_stbd_zsu,
6380 gen_helper_sve_sthd_be_zsu,
6381 gen_helper_sve_stsd_be_zsu,
6382 gen_helper_sve_stdd_be_zsu, },
6383 { gen_helper_sve_stbd_zss,
6384 gen_helper_sve_sthd_be_zss,
6385 gen_helper_sve_stsd_be_zss,
6386 gen_helper_sve_stdd_be_zss, },
6387 { gen_helper_sve_stbd_zd,
6388 gen_helper_sve_sthd_be_zd,
6389 gen_helper_sve_stsd_be_zd,
6390 gen_helper_sve_stdd_be_zd, } } },
6391 { /* MTE Inactive */
6392 { /* Little-endian */
6393 { gen_helper_sve_stbd_zsu_mte,
6394 gen_helper_sve_sthd_le_zsu_mte,
6395 gen_helper_sve_stsd_le_zsu_mte,
6396 gen_helper_sve_stdd_le_zsu_mte, },
6397 { gen_helper_sve_stbd_zss_mte,
6398 gen_helper_sve_sthd_le_zss_mte,
6399 gen_helper_sve_stsd_le_zss_mte,
6400 gen_helper_sve_stdd_le_zss_mte, },
6401 { gen_helper_sve_stbd_zd_mte,
6402 gen_helper_sve_sthd_le_zd_mte,
6403 gen_helper_sve_stsd_le_zd_mte,
6404 gen_helper_sve_stdd_le_zd_mte, } },
6405 { /* Big-endian */
6406 { gen_helper_sve_stbd_zsu_mte,
6407 gen_helper_sve_sthd_be_zsu_mte,
6408 gen_helper_sve_stsd_be_zsu_mte,
6409 gen_helper_sve_stdd_be_zsu_mte, },
6410 { gen_helper_sve_stbd_zss_mte,
6411 gen_helper_sve_sthd_be_zss_mte,
6412 gen_helper_sve_stsd_be_zss_mte,
6413 gen_helper_sve_stdd_be_zss_mte, },
6414 { gen_helper_sve_stbd_zd_mte,
6415 gen_helper_sve_sthd_be_zd_mte,
6416 gen_helper_sve_stsd_be_zd_mte,
6417 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6418};
6419
3a7be554 6420static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6421{
f6dbf62a 6422 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6423 bool be = s->be_data == MO_BE;
6424 bool mte = s->mte_active[0];
f6dbf62a
RH
6425
6426 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6427 return false;
6428 }
6429 if (!sve_access_check(s)) {
6430 return true;
6431 }
6432 switch (a->esz) {
6433 case MO_32:
d28d12f0 6434 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6435 break;
6436 case MO_64:
d28d12f0 6437 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6438 break;
6439 default:
6440 g_assert_not_reached();
6441 }
6442 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6443 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6444 return true;
6445}
dec6cf6b 6446
3a7be554 6447static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6448{
6449 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6450 bool be = s->be_data == MO_BE;
6451 bool mte = s->mte_active[0];
408ecde9
RH
6452
6453 if (a->esz < a->msz) {
6454 return false;
6455 }
6456 if (!sve_access_check(s)) {
6457 return true;
6458 }
6459
6460 switch (a->esz) {
6461 case MO_32:
d28d12f0 6462 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6463 break;
6464 case MO_64:
d28d12f0 6465 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6466 break;
6467 }
6468 assert(fn != NULL);
6469
6470 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6471 * by loading the immediate into the scalar parameter.
6472 */
2ccdf94f
RH
6473 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6474 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6475 return true;
6476}
6477
6ebca45f
SL
6478static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6479{
b17ab470
RH
6480 gen_helper_gvec_mem_scatter *fn;
6481 bool be = s->be_data == MO_BE;
6482 bool mte = s->mte_active[0];
6483
6484 if (a->esz < a->msz) {
6485 return false;
6486 }
6ebca45f
SL
6487 if (!dc_isar_feature(aa64_sve2, s)) {
6488 return false;
6489 }
b17ab470
RH
6490 if (!sve_access_check(s)) {
6491 return true;
6492 }
6493
6494 switch (a->esz) {
6495 case MO_32:
6496 fn = scatter_store_fn32[mte][be][0][a->msz];
6497 break;
6498 case MO_64:
6499 fn = scatter_store_fn64[mte][be][2][a->msz];
6500 break;
6501 default:
6502 g_assert_not_reached();
6503 }
6504
6505 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6506 cpu_reg(s, a->rm), a->msz, true, fn);
6507 return true;
6ebca45f
SL
6508}
6509
dec6cf6b
RH
6510/*
6511 * Prefetches
6512 */
6513
3a7be554 6514static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6515{
6516 /* Prefetch is a nop within QEMU. */
2f95a3b0 6517 (void)sve_access_check(s);
dec6cf6b
RH
6518 return true;
6519}
6520
3a7be554 6521static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6522{
6523 if (a->rm == 31) {
6524 return false;
6525 }
6526 /* Prefetch is a nop within QEMU. */
2f95a3b0 6527 (void)sve_access_check(s);
dec6cf6b
RH
6528 return true;
6529}
a2103582
RH
6530
6531/*
6532 * Move Prefix
6533 *
6534 * TODO: The implementation so far could handle predicated merging movprfx.
6535 * The helper functions as written take an extra source register to
6536 * use in the operation, but the result is only written when predication
6537 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6538 * to allow the final write back to the destination to be unconditional.
6539 * For predicated zeroing movprfx, we need to rearrange the helpers to
6540 * allow the final write back to zero inactives.
6541 *
6542 * In the meantime, just emit the moves.
6543 */
6544
3a7be554 6545static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6546{
6547 return do_mov_z(s, a->rd, a->rn);
6548}
6549
3a7be554 6550static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6551{
6552 if (sve_access_check(s)) {
6553 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6554 }
6555 return true;
6556}
6557
3a7be554 6558static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6559{
60245996 6560 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6561}
5dad1ba5
RH
6562
6563/*
6564 * SVE2 Integer Multiply - Unpredicated
6565 */
6566
6567static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6568{
6569 if (!dc_isar_feature(aa64_sve2, s)) {
6570 return false;
6571 }
6572 if (sve_access_check(s)) {
6573 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6574 }
6575 return true;
6576}
6577
bd394cf5
RH
6578static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6579 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6580 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6581};
6582TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6583 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6584
bd394cf5
RH
6585static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6586 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6587 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6588};
6589TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6590 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6591
bd394cf5
RH
6592TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6593 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6594
bd394cf5
RH
6595static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6596 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6597 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6598};
6599TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6600 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6601
bd394cf5
RH
6602static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6603 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6604 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6605};
6606TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6607 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6608
d4b1e59d
RH
6609/*
6610 * SVE2 Integer - Predicated
6611 */
6612
6613static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6614 gen_helper_gvec_4 *fn)
6615{
6616 if (!dc_isar_feature(aa64_sve2, s)) {
6617 return false;
6618 }
6619 return do_zpzz_ool(s, a, fn);
6620}
6621
6622static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6623{
6624 static gen_helper_gvec_4 * const fns[3] = {
6625 gen_helper_sve2_sadalp_zpzz_h,
6626 gen_helper_sve2_sadalp_zpzz_s,
6627 gen_helper_sve2_sadalp_zpzz_d,
6628 };
6629 if (a->esz == 0) {
6630 return false;
6631 }
6632 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6633}
6634
6635static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6636{
6637 static gen_helper_gvec_4 * const fns[3] = {
6638 gen_helper_sve2_uadalp_zpzz_h,
6639 gen_helper_sve2_uadalp_zpzz_s,
6640 gen_helper_sve2_uadalp_zpzz_d,
6641 };
6642 if (a->esz == 0) {
6643 return false;
6644 }
6645 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6646}
db366da8
RH
6647
6648/*
6649 * SVE2 integer unary operations (predicated)
6650 */
6651
6652static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6653 gen_helper_gvec_3 *fn)
6654{
6655 if (!dc_isar_feature(aa64_sve2, s)) {
6656 return false;
6657 }
6658 return do_zpz_ool(s, a, fn);
6659}
6660
6661static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6662{
6663 if (a->esz != 2) {
6664 return false;
6665 }
6666 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6667}
6668
6669static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6670{
6671 if (a->esz != 2) {
6672 return false;
6673 }
6674 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6675}
6676
6677static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6678{
6679 static gen_helper_gvec_3 * const fns[4] = {
6680 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6681 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6682 };
6683 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6684}
6685
6686static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6687{
6688 static gen_helper_gvec_3 * const fns[4] = {
6689 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6690 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6691 };
6692 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6693}
45d9503d
RH
6694
6695#define DO_SVE2_ZPZZ(NAME, name) \
6696static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6697{ \
6698 static gen_helper_gvec_4 * const fns[4] = { \
6699 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6700 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6701 }; \
6702 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6703}
6704
6705DO_SVE2_ZPZZ(SQSHL, sqshl)
6706DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6707DO_SVE2_ZPZZ(SRSHL, srshl)
6708
6709DO_SVE2_ZPZZ(UQSHL, uqshl)
6710DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6711DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6712
6713DO_SVE2_ZPZZ(SHADD, shadd)
6714DO_SVE2_ZPZZ(SRHADD, srhadd)
6715DO_SVE2_ZPZZ(SHSUB, shsub)
6716
6717DO_SVE2_ZPZZ(UHADD, uhadd)
6718DO_SVE2_ZPZZ(URHADD, urhadd)
6719DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6720
6721DO_SVE2_ZPZZ(ADDP, addp)
6722DO_SVE2_ZPZZ(SMAXP, smaxp)
6723DO_SVE2_ZPZZ(UMAXP, umaxp)
6724DO_SVE2_ZPZZ(SMINP, sminp)
6725DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6726
6727DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6728DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6729DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6730DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6731DO_SVE2_ZPZZ(SUQADD, suqadd)
6732DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6733
6734/*
6735 * SVE2 Widening Integer Arithmetic
6736 */
6737
6738static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6739 gen_helper_gvec_3 *fn, int data)
6740{
6741 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6742 return false;
6743 }
6744 if (sve_access_check(s)) {
6745 unsigned vsz = vec_full_reg_size(s);
6746 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6747 vec_full_reg_offset(s, a->rn),
6748 vec_full_reg_offset(s, a->rm),
6749 vsz, vsz, data, fn);
6750 }
6751 return true;
6752}
6753
6754#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6755static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6756{ \
6757 static gen_helper_gvec_3 * const fns[4] = { \
6758 NULL, gen_helper_sve2_##name##_h, \
6759 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6760 }; \
6761 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6762}
6763
6764DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6765DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6766DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6767
6768DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6769DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6770DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6771
6772DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6773DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6774DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6775
6776DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6777DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6778DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
daec426b
RH
6779
6780DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6781DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6782DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
81fccf09 6783
69ccc099
RH
6784DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6785DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6786
6787DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6788DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6789
6790DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6791DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6792
2df3ca55
RH
6793static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6794{
6795 static gen_helper_gvec_3 * const fns[4] = {
6796 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6797 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6798 };
6799 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6800}
6801
6802static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6803{
6804 return do_eor_tb(s, a, false);
6805}
6806
6807static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6808{
6809 return do_eor_tb(s, a, true);
6810}
6811
e3a56131
RH
6812static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6813{
6814 static gen_helper_gvec_3 * const fns[4] = {
6815 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6816 NULL, gen_helper_sve2_pmull_d,
6817 };
6818 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6819 return false;
6820 }
6821 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6822}
6823
6824static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6825{
6826 return do_trans_pmull(s, a, false);
6827}
6828
6829static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
6830{
6831 return do_trans_pmull(s, a, true);
6832}
6833
81fccf09
RH
6834#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
6835static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6836{ \
6837 static gen_helper_gvec_3 * const fns[4] = { \
6838 NULL, gen_helper_sve2_##name##_h, \
6839 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6840 }; \
6841 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
6842}
6843
6844DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
6845DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
6846DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
6847DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
6848
6849DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
6850DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
6851DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
6852DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
4269fef1
RH
6853
6854static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6855{
6856 int top = imm & 1;
6857 int shl = imm >> 1;
6858 int halfbits = 4 << vece;
6859
6860 if (top) {
6861 if (shl == halfbits) {
6862 TCGv_vec t = tcg_temp_new_vec_matching(d);
6863 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6864 tcg_gen_and_vec(vece, d, n, t);
6865 tcg_temp_free_vec(t);
6866 } else {
6867 tcg_gen_sari_vec(vece, d, n, halfbits);
6868 tcg_gen_shli_vec(vece, d, d, shl);
6869 }
6870 } else {
6871 tcg_gen_shli_vec(vece, d, n, halfbits);
6872 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6873 }
6874}
6875
6876static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6877{
6878 int halfbits = 4 << vece;
6879 int top = imm & 1;
6880 int shl = (imm >> 1);
6881 int shift;
6882 uint64_t mask;
6883
6884 mask = MAKE_64BIT_MASK(0, halfbits);
6885 mask <<= shl;
6886 mask = dup_const(vece, mask);
6887
6888 shift = shl - top * halfbits;
6889 if (shift < 0) {
6890 tcg_gen_shri_i64(d, n, -shift);
6891 } else {
6892 tcg_gen_shli_i64(d, n, shift);
6893 }
6894 tcg_gen_andi_i64(d, d, mask);
6895}
6896
6897static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6898{
6899 gen_ushll_i64(MO_16, d, n, imm);
6900}
6901
6902static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6903{
6904 gen_ushll_i64(MO_32, d, n, imm);
6905}
6906
6907static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6908{
6909 gen_ushll_i64(MO_64, d, n, imm);
6910}
6911
6912static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6913{
6914 int halfbits = 4 << vece;
6915 int top = imm & 1;
6916 int shl = imm >> 1;
6917
6918 if (top) {
6919 if (shl == halfbits) {
6920 TCGv_vec t = tcg_temp_new_vec_matching(d);
6921 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6922 tcg_gen_and_vec(vece, d, n, t);
6923 tcg_temp_free_vec(t);
6924 } else {
6925 tcg_gen_shri_vec(vece, d, n, halfbits);
6926 tcg_gen_shli_vec(vece, d, d, shl);
6927 }
6928 } else {
6929 if (shl == 0) {
6930 TCGv_vec t = tcg_temp_new_vec_matching(d);
6931 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6932 tcg_gen_and_vec(vece, d, n, t);
6933 tcg_temp_free_vec(t);
6934 } else {
6935 tcg_gen_shli_vec(vece, d, n, halfbits);
6936 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6937 }
6938 }
6939}
6940
6941static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6942 bool sel, bool uns)
6943{
6944 static const TCGOpcode sshll_list[] = {
6945 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6946 };
6947 static const TCGOpcode ushll_list[] = {
6948 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6949 };
6950 static const GVecGen2i ops[2][3] = {
6951 { { .fniv = gen_sshll_vec,
6952 .opt_opc = sshll_list,
6953 .fno = gen_helper_sve2_sshll_h,
6954 .vece = MO_16 },
6955 { .fniv = gen_sshll_vec,
6956 .opt_opc = sshll_list,
6957 .fno = gen_helper_sve2_sshll_s,
6958 .vece = MO_32 },
6959 { .fniv = gen_sshll_vec,
6960 .opt_opc = sshll_list,
6961 .fno = gen_helper_sve2_sshll_d,
6962 .vece = MO_64 } },
6963 { { .fni8 = gen_ushll16_i64,
6964 .fniv = gen_ushll_vec,
6965 .opt_opc = ushll_list,
6966 .fno = gen_helper_sve2_ushll_h,
6967 .vece = MO_16 },
6968 { .fni8 = gen_ushll32_i64,
6969 .fniv = gen_ushll_vec,
6970 .opt_opc = ushll_list,
6971 .fno = gen_helper_sve2_ushll_s,
6972 .vece = MO_32 },
6973 { .fni8 = gen_ushll64_i64,
6974 .fniv = gen_ushll_vec,
6975 .opt_opc = ushll_list,
6976 .fno = gen_helper_sve2_ushll_d,
6977 .vece = MO_64 } },
6978 };
6979
6980 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6981 return false;
6982 }
6983 if (sve_access_check(s)) {
6984 unsigned vsz = vec_full_reg_size(s);
6985 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6986 vec_full_reg_offset(s, a->rn),
6987 vsz, vsz, (a->imm << 1) | sel,
6988 &ops[uns][a->esz]);
6989 }
6990 return true;
6991}
6992
6993static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6994{
6995 return do_sve2_shll_tb(s, a, false, false);
6996}
6997
6998static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6999{
7000 return do_sve2_shll_tb(s, a, true, false);
7001}
7002
7003static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7004{
7005 return do_sve2_shll_tb(s, a, false, true);
7006}
7007
7008static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7009{
7010 return do_sve2_shll_tb(s, a, true, true);
7011}
cb9c33b8
RH
7012
7013static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7014{
7015 static gen_helper_gvec_3 * const fns[4] = {
7016 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7017 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7018 };
7019 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7020 return false;
7021 }
7022 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7023}
7024
7025static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7026{
7027 static gen_helper_gvec_3 * const fns[4] = {
7028 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7029 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7030 };
7031 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7032 return false;
7033 }
7034 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7035}
7036
7037static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7038{
7039 static gen_helper_gvec_3 * const fns[4] = {
7040 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7041 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7042 };
7043 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7044 return false;
7045 }
7046 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7047}
ed4a6387
RH
7048
7049static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7050{
7051 static gen_helper_gvec_3 * const fns[2][4] = {
7052 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7053 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7054 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7055 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7056 };
7057 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7058}
7059
7060static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7061{
7062 return do_cadd(s, a, false, false);
7063}
7064
7065static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7066{
7067 return do_cadd(s, a, false, true);
7068}
7069
7070static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7071{
7072 return do_cadd(s, a, true, false);
7073}
7074
7075static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7076{
7077 return do_cadd(s, a, true, true);
7078}
38650638 7079
eeb4e84d
RH
7080static gen_helper_gvec_4 * const sabal_fns[4] = {
7081 NULL, gen_helper_sve2_sabal_h,
7082 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
7083};
7084TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
7085TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 7086
eeb4e84d
RH
7087static gen_helper_gvec_4 * const uabal_fns[4] = {
7088 NULL, gen_helper_sve2_uabal_h,
7089 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
7090};
7091TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
7092TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
7093
7094static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7095{
7096 static gen_helper_gvec_4 * const fns[2] = {
7097 gen_helper_sve2_adcl_s,
7098 gen_helper_sve2_adcl_d,
7099 };
7100 /*
7101 * Note that in this case the ESZ field encodes both size and sign.
7102 * Split out 'subtract' into bit 1 of the data field for the helper.
7103 */
eeb4e84d 7104 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
7105}
7106
eeb4e84d
RH
7107TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
7108TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e
RH
7109
7110static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7111{
7112 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7113 return false;
7114 }
7115 if (sve_access_check(s)) {
7116 unsigned vsz = vec_full_reg_size(s);
7117 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7118 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7119 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7120 }
7121 return true;
7122}
7123
7124static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7125{
7126 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7127}
7128
7129static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7130{
7131 return do_sve2_fn2i(s, a, gen_gvec_usra);
7132}
7133
7134static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7135{
7136 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7137}
7138
7139static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7140{
7141 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7142}
fc12b46a
RH
7143
7144static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7145{
7146 return do_sve2_fn2i(s, a, gen_gvec_sri);
7147}
7148
7149static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7150{
7151 return do_sve2_fn2i(s, a, gen_gvec_sli);
7152}
289a1797
RH
7153
7154static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7155{
7156 if (!dc_isar_feature(aa64_sve2, s)) {
7157 return false;
7158 }
7159 if (sve_access_check(s)) {
7160 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7161 }
7162 return true;
7163}
7164
7165static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7166{
7167 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7168}
7169
7170static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7171{
7172 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7173}
5ff2838d
RH
7174
7175static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7176 const GVecGen2 ops[3])
7177{
7178 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7179 !dc_isar_feature(aa64_sve2, s)) {
7180 return false;
7181 }
7182 if (sve_access_check(s)) {
7183 unsigned vsz = vec_full_reg_size(s);
7184 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7185 vec_full_reg_offset(s, a->rn),
7186 vsz, vsz, &ops[a->esz]);
7187 }
7188 return true;
7189}
7190
7191static const TCGOpcode sqxtn_list[] = {
7192 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7193};
7194
7195static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7196{
7197 TCGv_vec t = tcg_temp_new_vec_matching(d);
7198 int halfbits = 4 << vece;
7199 int64_t mask = (1ull << halfbits) - 1;
7200 int64_t min = -1ull << (halfbits - 1);
7201 int64_t max = -min - 1;
7202
7203 tcg_gen_dupi_vec(vece, t, min);
7204 tcg_gen_smax_vec(vece, d, n, t);
7205 tcg_gen_dupi_vec(vece, t, max);
7206 tcg_gen_smin_vec(vece, d, d, t);
7207 tcg_gen_dupi_vec(vece, t, mask);
7208 tcg_gen_and_vec(vece, d, d, t);
7209 tcg_temp_free_vec(t);
7210}
7211
7212static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7213{
7214 static const GVecGen2 ops[3] = {
7215 { .fniv = gen_sqxtnb_vec,
7216 .opt_opc = sqxtn_list,
7217 .fno = gen_helper_sve2_sqxtnb_h,
7218 .vece = MO_16 },
7219 { .fniv = gen_sqxtnb_vec,
7220 .opt_opc = sqxtn_list,
7221 .fno = gen_helper_sve2_sqxtnb_s,
7222 .vece = MO_32 },
7223 { .fniv = gen_sqxtnb_vec,
7224 .opt_opc = sqxtn_list,
7225 .fno = gen_helper_sve2_sqxtnb_d,
7226 .vece = MO_64 },
7227 };
7228 return do_sve2_narrow_extract(s, a, ops);
7229}
7230
7231static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7232{
7233 TCGv_vec t = tcg_temp_new_vec_matching(d);
7234 int halfbits = 4 << vece;
7235 int64_t mask = (1ull << halfbits) - 1;
7236 int64_t min = -1ull << (halfbits - 1);
7237 int64_t max = -min - 1;
7238
7239 tcg_gen_dupi_vec(vece, t, min);
7240 tcg_gen_smax_vec(vece, n, n, t);
7241 tcg_gen_dupi_vec(vece, t, max);
7242 tcg_gen_smin_vec(vece, n, n, t);
7243 tcg_gen_shli_vec(vece, n, n, halfbits);
7244 tcg_gen_dupi_vec(vece, t, mask);
7245 tcg_gen_bitsel_vec(vece, d, t, d, n);
7246 tcg_temp_free_vec(t);
7247}
7248
7249static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7250{
7251 static const GVecGen2 ops[3] = {
7252 { .fniv = gen_sqxtnt_vec,
7253 .opt_opc = sqxtn_list,
7254 .load_dest = true,
7255 .fno = gen_helper_sve2_sqxtnt_h,
7256 .vece = MO_16 },
7257 { .fniv = gen_sqxtnt_vec,
7258 .opt_opc = sqxtn_list,
7259 .load_dest = true,
7260 .fno = gen_helper_sve2_sqxtnt_s,
7261 .vece = MO_32 },
7262 { .fniv = gen_sqxtnt_vec,
7263 .opt_opc = sqxtn_list,
7264 .load_dest = true,
7265 .fno = gen_helper_sve2_sqxtnt_d,
7266 .vece = MO_64 },
7267 };
7268 return do_sve2_narrow_extract(s, a, ops);
7269}
7270
7271static const TCGOpcode uqxtn_list[] = {
7272 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7273};
7274
7275static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7276{
7277 TCGv_vec t = tcg_temp_new_vec_matching(d);
7278 int halfbits = 4 << vece;
7279 int64_t max = (1ull << halfbits) - 1;
7280
7281 tcg_gen_dupi_vec(vece, t, max);
7282 tcg_gen_umin_vec(vece, d, n, t);
7283 tcg_temp_free_vec(t);
7284}
7285
7286static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7287{
7288 static const GVecGen2 ops[3] = {
7289 { .fniv = gen_uqxtnb_vec,
7290 .opt_opc = uqxtn_list,
7291 .fno = gen_helper_sve2_uqxtnb_h,
7292 .vece = MO_16 },
7293 { .fniv = gen_uqxtnb_vec,
7294 .opt_opc = uqxtn_list,
7295 .fno = gen_helper_sve2_uqxtnb_s,
7296 .vece = MO_32 },
7297 { .fniv = gen_uqxtnb_vec,
7298 .opt_opc = uqxtn_list,
7299 .fno = gen_helper_sve2_uqxtnb_d,
7300 .vece = MO_64 },
7301 };
7302 return do_sve2_narrow_extract(s, a, ops);
7303}
7304
7305static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7306{
7307 TCGv_vec t = tcg_temp_new_vec_matching(d);
7308 int halfbits = 4 << vece;
7309 int64_t max = (1ull << halfbits) - 1;
7310
7311 tcg_gen_dupi_vec(vece, t, max);
7312 tcg_gen_umin_vec(vece, n, n, t);
7313 tcg_gen_shli_vec(vece, n, n, halfbits);
7314 tcg_gen_bitsel_vec(vece, d, t, d, n);
7315 tcg_temp_free_vec(t);
7316}
7317
7318static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7319{
7320 static const GVecGen2 ops[3] = {
7321 { .fniv = gen_uqxtnt_vec,
7322 .opt_opc = uqxtn_list,
7323 .load_dest = true,
7324 .fno = gen_helper_sve2_uqxtnt_h,
7325 .vece = MO_16 },
7326 { .fniv = gen_uqxtnt_vec,
7327 .opt_opc = uqxtn_list,
7328 .load_dest = true,
7329 .fno = gen_helper_sve2_uqxtnt_s,
7330 .vece = MO_32 },
7331 { .fniv = gen_uqxtnt_vec,
7332 .opt_opc = uqxtn_list,
7333 .load_dest = true,
7334 .fno = gen_helper_sve2_uqxtnt_d,
7335 .vece = MO_64 },
7336 };
7337 return do_sve2_narrow_extract(s, a, ops);
7338}
7339
7340static const TCGOpcode sqxtun_list[] = {
7341 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7342};
7343
7344static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7345{
7346 TCGv_vec t = tcg_temp_new_vec_matching(d);
7347 int halfbits = 4 << vece;
7348 int64_t max = (1ull << halfbits) - 1;
7349
7350 tcg_gen_dupi_vec(vece, t, 0);
7351 tcg_gen_smax_vec(vece, d, n, t);
7352 tcg_gen_dupi_vec(vece, t, max);
7353 tcg_gen_umin_vec(vece, d, d, t);
7354 tcg_temp_free_vec(t);
7355}
7356
7357static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7358{
7359 static const GVecGen2 ops[3] = {
7360 { .fniv = gen_sqxtunb_vec,
7361 .opt_opc = sqxtun_list,
7362 .fno = gen_helper_sve2_sqxtunb_h,
7363 .vece = MO_16 },
7364 { .fniv = gen_sqxtunb_vec,
7365 .opt_opc = sqxtun_list,
7366 .fno = gen_helper_sve2_sqxtunb_s,
7367 .vece = MO_32 },
7368 { .fniv = gen_sqxtunb_vec,
7369 .opt_opc = sqxtun_list,
7370 .fno = gen_helper_sve2_sqxtunb_d,
7371 .vece = MO_64 },
7372 };
7373 return do_sve2_narrow_extract(s, a, ops);
7374}
7375
7376static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7377{
7378 TCGv_vec t = tcg_temp_new_vec_matching(d);
7379 int halfbits = 4 << vece;
7380 int64_t max = (1ull << halfbits) - 1;
7381
7382 tcg_gen_dupi_vec(vece, t, 0);
7383 tcg_gen_smax_vec(vece, n, n, t);
7384 tcg_gen_dupi_vec(vece, t, max);
7385 tcg_gen_umin_vec(vece, n, n, t);
7386 tcg_gen_shli_vec(vece, n, n, halfbits);
7387 tcg_gen_bitsel_vec(vece, d, t, d, n);
7388 tcg_temp_free_vec(t);
7389}
7390
7391static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7392{
7393 static const GVecGen2 ops[3] = {
7394 { .fniv = gen_sqxtunt_vec,
7395 .opt_opc = sqxtun_list,
7396 .load_dest = true,
7397 .fno = gen_helper_sve2_sqxtunt_h,
7398 .vece = MO_16 },
7399 { .fniv = gen_sqxtunt_vec,
7400 .opt_opc = sqxtun_list,
7401 .load_dest = true,
7402 .fno = gen_helper_sve2_sqxtunt_s,
7403 .vece = MO_32 },
7404 { .fniv = gen_sqxtunt_vec,
7405 .opt_opc = sqxtun_list,
7406 .load_dest = true,
7407 .fno = gen_helper_sve2_sqxtunt_d,
7408 .vece = MO_64 },
7409 };
7410 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7411}
7412
7413static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7414 const GVecGen2i ops[3])
7415{
7416 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7417 return false;
7418 }
7419 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7420 if (sve_access_check(s)) {
7421 unsigned vsz = vec_full_reg_size(s);
7422 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7423 vec_full_reg_offset(s, a->rn),
7424 vsz, vsz, a->imm, &ops[a->esz]);
7425 }
7426 return true;
7427}
7428
7429static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7430{
7431 int halfbits = 4 << vece;
7432 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7433
7434 tcg_gen_shri_i64(d, n, shr);
7435 tcg_gen_andi_i64(d, d, mask);
7436}
7437
7438static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7439{
7440 gen_shrnb_i64(MO_16, d, n, shr);
7441}
7442
7443static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7444{
7445 gen_shrnb_i64(MO_32, d, n, shr);
7446}
7447
7448static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7449{
7450 gen_shrnb_i64(MO_64, d, n, shr);
7451}
7452
7453static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7454{
7455 TCGv_vec t = tcg_temp_new_vec_matching(d);
7456 int halfbits = 4 << vece;
7457 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7458
7459 tcg_gen_shri_vec(vece, n, n, shr);
7460 tcg_gen_dupi_vec(vece, t, mask);
7461 tcg_gen_and_vec(vece, d, n, t);
7462 tcg_temp_free_vec(t);
7463}
7464
7465static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7466{
7467 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7468 static const GVecGen2i ops[3] = {
7469 { .fni8 = gen_shrnb16_i64,
7470 .fniv = gen_shrnb_vec,
7471 .opt_opc = vec_list,
7472 .fno = gen_helper_sve2_shrnb_h,
7473 .vece = MO_16 },
7474 { .fni8 = gen_shrnb32_i64,
7475 .fniv = gen_shrnb_vec,
7476 .opt_opc = vec_list,
7477 .fno = gen_helper_sve2_shrnb_s,
7478 .vece = MO_32 },
7479 { .fni8 = gen_shrnb64_i64,
7480 .fniv = gen_shrnb_vec,
7481 .opt_opc = vec_list,
7482 .fno = gen_helper_sve2_shrnb_d,
7483 .vece = MO_64 },
7484 };
7485 return do_sve2_shr_narrow(s, a, ops);
7486}
7487
7488static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7489{
7490 int halfbits = 4 << vece;
7491 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7492
7493 tcg_gen_shli_i64(n, n, halfbits - shr);
7494 tcg_gen_andi_i64(n, n, ~mask);
7495 tcg_gen_andi_i64(d, d, mask);
7496 tcg_gen_or_i64(d, d, n);
7497}
7498
7499static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7500{
7501 gen_shrnt_i64(MO_16, d, n, shr);
7502}
7503
7504static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7505{
7506 gen_shrnt_i64(MO_32, d, n, shr);
7507}
7508
7509static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7510{
7511 tcg_gen_shri_i64(n, n, shr);
7512 tcg_gen_deposit_i64(d, d, n, 32, 32);
7513}
7514
7515static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7516{
7517 TCGv_vec t = tcg_temp_new_vec_matching(d);
7518 int halfbits = 4 << vece;
7519 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7520
7521 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7522 tcg_gen_dupi_vec(vece, t, mask);
7523 tcg_gen_bitsel_vec(vece, d, t, d, n);
7524 tcg_temp_free_vec(t);
7525}
7526
7527static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7528{
7529 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7530 static const GVecGen2i ops[3] = {
7531 { .fni8 = gen_shrnt16_i64,
7532 .fniv = gen_shrnt_vec,
7533 .opt_opc = vec_list,
7534 .load_dest = true,
7535 .fno = gen_helper_sve2_shrnt_h,
7536 .vece = MO_16 },
7537 { .fni8 = gen_shrnt32_i64,
7538 .fniv = gen_shrnt_vec,
7539 .opt_opc = vec_list,
7540 .load_dest = true,
7541 .fno = gen_helper_sve2_shrnt_s,
7542 .vece = MO_32 },
7543 { .fni8 = gen_shrnt64_i64,
7544 .fniv = gen_shrnt_vec,
7545 .opt_opc = vec_list,
7546 .load_dest = true,
7547 .fno = gen_helper_sve2_shrnt_d,
7548 .vece = MO_64 },
7549 };
7550 return do_sve2_shr_narrow(s, a, ops);
7551}
7552
7553static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7554{
7555 static const GVecGen2i ops[3] = {
7556 { .fno = gen_helper_sve2_rshrnb_h },
7557 { .fno = gen_helper_sve2_rshrnb_s },
7558 { .fno = gen_helper_sve2_rshrnb_d },
7559 };
7560 return do_sve2_shr_narrow(s, a, ops);
7561}
7562
7563static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7564{
7565 static const GVecGen2i ops[3] = {
7566 { .fno = gen_helper_sve2_rshrnt_h },
7567 { .fno = gen_helper_sve2_rshrnt_s },
7568 { .fno = gen_helper_sve2_rshrnt_d },
7569 };
7570 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7571}
7572
7573static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7574 TCGv_vec n, int64_t shr)
7575{
7576 TCGv_vec t = tcg_temp_new_vec_matching(d);
7577 int halfbits = 4 << vece;
7578
7579 tcg_gen_sari_vec(vece, n, n, shr);
7580 tcg_gen_dupi_vec(vece, t, 0);
7581 tcg_gen_smax_vec(vece, n, n, t);
7582 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7583 tcg_gen_umin_vec(vece, d, n, t);
7584 tcg_temp_free_vec(t);
7585}
7586
7587static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7588{
7589 static const TCGOpcode vec_list[] = {
7590 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7591 };
7592 static const GVecGen2i ops[3] = {
7593 { .fniv = gen_sqshrunb_vec,
7594 .opt_opc = vec_list,
7595 .fno = gen_helper_sve2_sqshrunb_h,
7596 .vece = MO_16 },
7597 { .fniv = gen_sqshrunb_vec,
7598 .opt_opc = vec_list,
7599 .fno = gen_helper_sve2_sqshrunb_s,
7600 .vece = MO_32 },
7601 { .fniv = gen_sqshrunb_vec,
7602 .opt_opc = vec_list,
7603 .fno = gen_helper_sve2_sqshrunb_d,
7604 .vece = MO_64 },
7605 };
7606 return do_sve2_shr_narrow(s, a, ops);
7607}
7608
7609static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7610 TCGv_vec n, int64_t shr)
7611{
7612 TCGv_vec t = tcg_temp_new_vec_matching(d);
7613 int halfbits = 4 << vece;
7614
7615 tcg_gen_sari_vec(vece, n, n, shr);
7616 tcg_gen_dupi_vec(vece, t, 0);
7617 tcg_gen_smax_vec(vece, n, n, t);
7618 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7619 tcg_gen_umin_vec(vece, n, n, t);
7620 tcg_gen_shli_vec(vece, n, n, halfbits);
7621 tcg_gen_bitsel_vec(vece, d, t, d, n);
7622 tcg_temp_free_vec(t);
7623}
7624
7625static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7626{
7627 static const TCGOpcode vec_list[] = {
7628 INDEX_op_shli_vec, INDEX_op_sari_vec,
7629 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7630 };
7631 static const GVecGen2i ops[3] = {
7632 { .fniv = gen_sqshrunt_vec,
7633 .opt_opc = vec_list,
7634 .load_dest = true,
7635 .fno = gen_helper_sve2_sqshrunt_h,
7636 .vece = MO_16 },
7637 { .fniv = gen_sqshrunt_vec,
7638 .opt_opc = vec_list,
7639 .load_dest = true,
7640 .fno = gen_helper_sve2_sqshrunt_s,
7641 .vece = MO_32 },
7642 { .fniv = gen_sqshrunt_vec,
7643 .opt_opc = vec_list,
7644 .load_dest = true,
7645 .fno = gen_helper_sve2_sqshrunt_d,
7646 .vece = MO_64 },
7647 };
7648 return do_sve2_shr_narrow(s, a, ops);
7649}
7650
7651static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7652{
7653 static const GVecGen2i ops[3] = {
7654 { .fno = gen_helper_sve2_sqrshrunb_h },
7655 { .fno = gen_helper_sve2_sqrshrunb_s },
7656 { .fno = gen_helper_sve2_sqrshrunb_d },
7657 };
7658 return do_sve2_shr_narrow(s, a, ops);
7659}
7660
7661static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7662{
7663 static const GVecGen2i ops[3] = {
7664 { .fno = gen_helper_sve2_sqrshrunt_h },
7665 { .fno = gen_helper_sve2_sqrshrunt_s },
7666 { .fno = gen_helper_sve2_sqrshrunt_d },
7667 };
7668 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7669}
7670
743bb147
RH
7671static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7672 TCGv_vec n, int64_t shr)
7673{
7674 TCGv_vec t = tcg_temp_new_vec_matching(d);
7675 int halfbits = 4 << vece;
7676 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7677 int64_t min = -max - 1;
7678
7679 tcg_gen_sari_vec(vece, n, n, shr);
7680 tcg_gen_dupi_vec(vece, t, min);
7681 tcg_gen_smax_vec(vece, n, n, t);
7682 tcg_gen_dupi_vec(vece, t, max);
7683 tcg_gen_smin_vec(vece, n, n, t);
7684 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7685 tcg_gen_and_vec(vece, d, n, t);
7686 tcg_temp_free_vec(t);
7687}
7688
7689static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7690{
7691 static const TCGOpcode vec_list[] = {
7692 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7693 };
7694 static const GVecGen2i ops[3] = {
7695 { .fniv = gen_sqshrnb_vec,
7696 .opt_opc = vec_list,
7697 .fno = gen_helper_sve2_sqshrnb_h,
7698 .vece = MO_16 },
7699 { .fniv = gen_sqshrnb_vec,
7700 .opt_opc = vec_list,
7701 .fno = gen_helper_sve2_sqshrnb_s,
7702 .vece = MO_32 },
7703 { .fniv = gen_sqshrnb_vec,
7704 .opt_opc = vec_list,
7705 .fno = gen_helper_sve2_sqshrnb_d,
7706 .vece = MO_64 },
7707 };
7708 return do_sve2_shr_narrow(s, a, ops);
7709}
7710
7711static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7712 TCGv_vec n, int64_t shr)
7713{
7714 TCGv_vec t = tcg_temp_new_vec_matching(d);
7715 int halfbits = 4 << vece;
7716 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7717 int64_t min = -max - 1;
7718
7719 tcg_gen_sari_vec(vece, n, n, shr);
7720 tcg_gen_dupi_vec(vece, t, min);
7721 tcg_gen_smax_vec(vece, n, n, t);
7722 tcg_gen_dupi_vec(vece, t, max);
7723 tcg_gen_smin_vec(vece, n, n, t);
7724 tcg_gen_shli_vec(vece, n, n, halfbits);
7725 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7726 tcg_gen_bitsel_vec(vece, d, t, d, n);
7727 tcg_temp_free_vec(t);
7728}
7729
7730static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7731{
7732 static const TCGOpcode vec_list[] = {
7733 INDEX_op_shli_vec, INDEX_op_sari_vec,
7734 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7735 };
7736 static const GVecGen2i ops[3] = {
7737 { .fniv = gen_sqshrnt_vec,
7738 .opt_opc = vec_list,
7739 .load_dest = true,
7740 .fno = gen_helper_sve2_sqshrnt_h,
7741 .vece = MO_16 },
7742 { .fniv = gen_sqshrnt_vec,
7743 .opt_opc = vec_list,
7744 .load_dest = true,
7745 .fno = gen_helper_sve2_sqshrnt_s,
7746 .vece = MO_32 },
7747 { .fniv = gen_sqshrnt_vec,
7748 .opt_opc = vec_list,
7749 .load_dest = true,
7750 .fno = gen_helper_sve2_sqshrnt_d,
7751 .vece = MO_64 },
7752 };
7753 return do_sve2_shr_narrow(s, a, ops);
7754}
7755
7756static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7757{
7758 static const GVecGen2i ops[3] = {
7759 { .fno = gen_helper_sve2_sqrshrnb_h },
7760 { .fno = gen_helper_sve2_sqrshrnb_s },
7761 { .fno = gen_helper_sve2_sqrshrnb_d },
7762 };
7763 return do_sve2_shr_narrow(s, a, ops);
7764}
7765
7766static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7767{
7768 static const GVecGen2i ops[3] = {
7769 { .fno = gen_helper_sve2_sqrshrnt_h },
7770 { .fno = gen_helper_sve2_sqrshrnt_s },
7771 { .fno = gen_helper_sve2_sqrshrnt_d },
7772 };
7773 return do_sve2_shr_narrow(s, a, ops);
7774}
7775
c13418da
RH
7776static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7777 TCGv_vec n, int64_t shr)
7778{
7779 TCGv_vec t = tcg_temp_new_vec_matching(d);
7780 int halfbits = 4 << vece;
7781
7782 tcg_gen_shri_vec(vece, n, n, shr);
7783 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7784 tcg_gen_umin_vec(vece, d, n, t);
7785 tcg_temp_free_vec(t);
7786}
7787
7788static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7789{
7790 static const TCGOpcode vec_list[] = {
7791 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7792 };
7793 static const GVecGen2i ops[3] = {
7794 { .fniv = gen_uqshrnb_vec,
7795 .opt_opc = vec_list,
7796 .fno = gen_helper_sve2_uqshrnb_h,
7797 .vece = MO_16 },
7798 { .fniv = gen_uqshrnb_vec,
7799 .opt_opc = vec_list,
7800 .fno = gen_helper_sve2_uqshrnb_s,
7801 .vece = MO_32 },
7802 { .fniv = gen_uqshrnb_vec,
7803 .opt_opc = vec_list,
7804 .fno = gen_helper_sve2_uqshrnb_d,
7805 .vece = MO_64 },
7806 };
7807 return do_sve2_shr_narrow(s, a, ops);
7808}
7809
7810static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7811 TCGv_vec n, int64_t shr)
7812{
7813 TCGv_vec t = tcg_temp_new_vec_matching(d);
7814 int halfbits = 4 << vece;
7815
7816 tcg_gen_shri_vec(vece, n, n, shr);
7817 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7818 tcg_gen_umin_vec(vece, n, n, t);
7819 tcg_gen_shli_vec(vece, n, n, halfbits);
7820 tcg_gen_bitsel_vec(vece, d, t, d, n);
7821 tcg_temp_free_vec(t);
7822}
7823
7824static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7825{
7826 static const TCGOpcode vec_list[] = {
7827 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7828 };
7829 static const GVecGen2i ops[3] = {
7830 { .fniv = gen_uqshrnt_vec,
7831 .opt_opc = vec_list,
7832 .load_dest = true,
7833 .fno = gen_helper_sve2_uqshrnt_h,
7834 .vece = MO_16 },
7835 { .fniv = gen_uqshrnt_vec,
7836 .opt_opc = vec_list,
7837 .load_dest = true,
7838 .fno = gen_helper_sve2_uqshrnt_s,
7839 .vece = MO_32 },
7840 { .fniv = gen_uqshrnt_vec,
7841 .opt_opc = vec_list,
7842 .load_dest = true,
7843 .fno = gen_helper_sve2_uqshrnt_d,
7844 .vece = MO_64 },
7845 };
7846 return do_sve2_shr_narrow(s, a, ops);
7847}
7848
7849static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7850{
7851 static const GVecGen2i ops[3] = {
7852 { .fno = gen_helper_sve2_uqrshrnb_h },
7853 { .fno = gen_helper_sve2_uqrshrnb_s },
7854 { .fno = gen_helper_sve2_uqrshrnb_d },
7855 };
7856 return do_sve2_shr_narrow(s, a, ops);
7857}
7858
7859static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7860{
7861 static const GVecGen2i ops[3] = {
7862 { .fno = gen_helper_sve2_uqrshrnt_h },
7863 { .fno = gen_helper_sve2_uqrshrnt_s },
7864 { .fno = gen_helper_sve2_uqrshrnt_d },
7865 };
7866 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7867}
b87dbeeb 7868
40d5ea50 7869#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7870 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7871 NULL, gen_helper_sve2_##name##_h, \
7872 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7873 }; \
bd394cf5
RH
7874 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7875 name##_fns[a->esz], a, 0)
40d5ea50
SL
7876
7877DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7878DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7879DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7880DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7881
c3cd6766
SL
7882DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7883DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7884DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7885DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7886
e0ae6ec3
SL
7887static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
7888 gen_helper_gvec_flags_4 *fn)
7889{
7890 if (!dc_isar_feature(aa64_sve2, s)) {
7891 return false;
7892 }
7893 return do_ppzz_flags(s, a, fn);
7894}
7895
7896#define DO_SVE2_PPZZ_MATCH(NAME, name) \
7897static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7898{ \
7899 static gen_helper_gvec_flags_4 * const fns[4] = { \
7900 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
7901 NULL, NULL \
7902 }; \
7903 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
7904}
7905
7906DO_SVE2_PPZZ_MATCH(MATCH, match)
7907DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
7908
7d47ac94
SL
7909static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
7910{
7911 static gen_helper_gvec_4 * const fns[2] = {
7912 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7913 };
7914 if (a->esz < 2) {
7915 return false;
7916 }
7917 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
7918}
7919
bd394cf5
RH
7920TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7921 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7922
b87dbeeb
SL
7923static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7924 gen_helper_gvec_4_ptr *fn)
7925{
7926 if (!dc_isar_feature(aa64_sve2, s)) {
7927 return false;
7928 }
7929 return do_zpzz_fp(s, a, fn);
7930}
7931
7932#define DO_SVE2_ZPZZ_FP(NAME, name) \
7933static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7934{ \
7935 static gen_helper_gvec_4_ptr * const fns[4] = { \
7936 NULL, gen_helper_sve2_##name##_zpzz_h, \
7937 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7938 }; \
7939 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7940}
7941
7942DO_SVE2_ZPZZ_FP(FADDP, faddp)
7943DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7944DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7945DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7946DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7947
7948/*
7949 * SVE Integer Multiply-Add (unpredicated)
7950 */
7951
4f26756b
SL
7952static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
7953{
7954 gen_helper_gvec_4_ptr *fn;
7955
7956 switch (a->esz) {
7957 case MO_32:
7958 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
7959 return false;
7960 }
7961 fn = gen_helper_fmmla_s;
7962 break;
7963 case MO_64:
7964 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
7965 return false;
7966 }
7967 fn = gen_helper_fmmla_d;
7968 break;
7969 default:
7970 return false;
7971 }
7972
7973 if (sve_access_check(s)) {
7974 unsigned vsz = vec_full_reg_size(s);
7975 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7976 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7977 vec_full_reg_offset(s, a->rn),
7978 vec_full_reg_offset(s, a->rm),
7979 vec_full_reg_offset(s, a->ra),
7980 status, vsz, vsz, 0, fn);
7981 tcg_temp_free_ptr(status);
7982 }
7983 return true;
7984}
7985
eeb4e84d
RH
7986static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7987 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7988 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7989};
7990TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7991 sqdmlal_zzzw_fns[a->esz], a, 0)
7992TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7993 sqdmlal_zzzw_fns[a->esz], a, 3)
7994TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7995 sqdmlal_zzzw_fns[a->esz], a, 2)
7996
7997static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7998 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7999 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8000};
8001TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8002 sqdmlsl_zzzw_fns[a->esz], a, 0)
8003TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8004 sqdmlsl_zzzw_fns[a->esz], a, 3)
8005TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
8006 sqdmlsl_zzzw_fns[a->esz], a, 2)
8007
8008static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
8009 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8010 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8011};
8012TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
8013 sqrdmlah_fns[a->esz], a, 0)
45a32e80 8014
eeb4e84d
RH
8015static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
8016 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8017 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8018};
8019TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
8020 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 8021
eeb4e84d
RH
8022static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
8023 NULL, gen_helper_sve2_smlal_zzzw_h,
8024 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8025};
8026TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8027 smlal_zzzw_fns[a->esz], a, 0)
8028TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8029 smlal_zzzw_fns[a->esz], a, 1)
8030
8031static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
8032 NULL, gen_helper_sve2_umlal_zzzw_h,
8033 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8034};
8035TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8036 umlal_zzzw_fns[a->esz], a, 0)
8037TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8038 umlal_zzzw_fns[a->esz], a, 1)
8039
8040static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
8041 NULL, gen_helper_sve2_smlsl_zzzw_h,
8042 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8043};
8044TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8045 smlsl_zzzw_fns[a->esz], a, 0)
8046TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8047 smlsl_zzzw_fns[a->esz], a, 1)
8048
8049static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
8050 NULL, gen_helper_sve2_umlsl_zzzw_h,
8051 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8052};
8053TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8054 umlsl_zzzw_fns[a->esz], a, 0)
8055TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8056 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 8057
5f425b92
RH
8058static gen_helper_gvec_4 * const cmla_fns[] = {
8059 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8060 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8061};
8062TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
8063 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 8064
5f425b92
RH
8065static gen_helper_gvec_4 * const cdot_fns[] = {
8066 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
8067};
8068TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
8069 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 8070
5f425b92
RH
8071static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
8072 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8073 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8074};
8075TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
8076 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a
RH
8077
8078static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8079{
8080 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8081 return false;
8082 }
8083 if (sve_access_check(s)) {
8084 unsigned vsz = vec_full_reg_size(s);
8085 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8086 vec_full_reg_offset(s, a->rn),
8087 vec_full_reg_offset(s, a->rm),
8088 vec_full_reg_offset(s, a->ra),
8089 vsz, vsz, 0, gen_helper_gvec_usdot_b);
8090 }
8091 return true;
8092}
b2bcd1be 8093
0ea3cdbf
RH
8094TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
8095 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 8096
32e2ad65
RH
8097TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
8098 gen_helper_crypto_aese, a, false)
8099TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
8100 gen_helper_crypto_aese, a, true)
3cc7a88e 8101
32e2ad65
RH
8102TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
8103 gen_helper_crypto_sm4e, a, 0)
8104TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
8105 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f
RH
8106
8107static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8108{
8109 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8110 return false;
8111 }
8112 if (sve_access_check(s)) {
8113 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8114 }
8115 return true;
8116}
5c1b7226
RH
8117
8118static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8119{
8120 if (!dc_isar_feature(aa64_sve2, s)) {
8121 return false;
8122 }
8123 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8124}
8125
d29b17ca
RH
8126static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8127{
8128 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8129 return false;
8130 }
8131 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8132}
8133
5c1b7226
RH
8134static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8135{
8136 if (!dc_isar_feature(aa64_sve2, s)) {
8137 return false;
8138 }
8139 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8140}
83c2523f
SL
8141
8142static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8143{
8144 if (!dc_isar_feature(aa64_sve2, s)) {
8145 return false;
8146 }
8147 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8148}
8149
8150static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8151{
8152 if (!dc_isar_feature(aa64_sve2, s)) {
8153 return false;
8154 }
8155 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8156}
95365277
SL
8157
8158static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8159{
8160 if (!dc_isar_feature(aa64_sve2, s)) {
8161 return false;
8162 }
8163 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8164}
8165
8166static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8167{
8168 if (!dc_isar_feature(aa64_sve2, s)) {
8169 return false;
8170 }
8171 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8172}
631be02e
SL
8173
8174static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8175{
8176 static gen_helper_gvec_3_ptr * const fns[] = {
8177 NULL, gen_helper_flogb_h,
8178 gen_helper_flogb_s, gen_helper_flogb_d
8179 };
8180
8181 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8182 return false;
8183 }
8184 if (sve_access_check(s)) {
8185 TCGv_ptr status =
8186 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8187 unsigned vsz = vec_full_reg_size(s);
8188
8189 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8190 vec_full_reg_offset(s, a->rn),
8191 pred_full_reg_offset(s, a->pg),
8192 status, vsz, vsz, 0, fns[a->esz]);
8193 tcg_temp_free_ptr(status);
8194 }
8195 return true;
8196}
50d102bd
SL
8197
8198static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8199{
8200 if (!dc_isar_feature(aa64_sve2, s)) {
8201 return false;
8202 }
8203 if (sve_access_check(s)) {
8204 unsigned vsz = vec_full_reg_size(s);
8205 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8206 vec_full_reg_offset(s, a->rn),
8207 vec_full_reg_offset(s, a->rm),
8208 vec_full_reg_offset(s, a->ra),
8209 cpu_env, vsz, vsz, (sel << 1) | sub,
8210 gen_helper_sve2_fmlal_zzzw_s);
8211 }
8212 return true;
8213}
8214
8215static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8216{
8217 return do_FMLAL_zzzw(s, a, false, false);
8218}
8219
8220static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8221{
8222 return do_FMLAL_zzzw(s, a, false, true);
8223}
8224
8225static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8226{
8227 return do_FMLAL_zzzw(s, a, true, false);
8228}
8229
8230static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8231{
8232 return do_FMLAL_zzzw(s, a, true, true);
8233}
8234
8235static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8236{
8237 if (!dc_isar_feature(aa64_sve2, s)) {
8238 return false;
8239 }
8240 if (sve_access_check(s)) {
8241 unsigned vsz = vec_full_reg_size(s);
8242 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8243 vec_full_reg_offset(s, a->rn),
8244 vec_full_reg_offset(s, a->rm),
8245 vec_full_reg_offset(s, a->ra),
8246 cpu_env, vsz, vsz,
8247 (a->index << 2) | (sel << 1) | sub,
8248 gen_helper_sve2_fmlal_zzxw_s);
8249 }
8250 return true;
8251}
8252
8253static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8254{
8255 return do_FMLAL_zzxw(s, a, false, false);
8256}
8257
8258static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8259{
8260 return do_FMLAL_zzxw(s, a, false, true);
8261}
8262
8263static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8264{
8265 return do_FMLAL_zzxw(s, a, true, false);
8266}
8267
8268static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8269{
8270 return do_FMLAL_zzxw(s, a, true, true);
8271}
2323c5ff 8272
eec05e4e
RH
8273TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8274 gen_helper_gvec_smmla_b, a, 0)
8275TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8276 gen_helper_gvec_usmmla_b, a, 0)
8277TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8278 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 8279
eec05e4e
RH
8280TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8281 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
8282TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
8283 gen_helper_gvec_bfdot_idx, a)
81266a1f 8284
eec05e4e
RH
8285TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8286 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
8287
8288static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8289{
8290 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8291 return false;
8292 }
8293 if (sve_access_check(s)) {
8294 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8295 unsigned vsz = vec_full_reg_size(s);
8296
8297 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8298 vec_full_reg_offset(s, a->rn),
8299 vec_full_reg_offset(s, a->rm),
8300 vec_full_reg_offset(s, a->ra),
8301 status, vsz, vsz, sel,
8302 gen_helper_gvec_bfmlal);
8303 tcg_temp_free_ptr(status);
8304 }
8305 return true;
8306}
8307
8308static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8309{
8310 return do_BFMLAL_zzzw(s, a, false);
8311}
8312
8313static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8314{
8315 return do_BFMLAL_zzzw(s, a, true);
8316}
458d0ab6
RH
8317
8318static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8319{
8320 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8321 return false;
8322 }
8323 if (sve_access_check(s)) {
8324 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8325 unsigned vsz = vec_full_reg_size(s);
8326
8327 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8328 vec_full_reg_offset(s, a->rn),
8329 vec_full_reg_offset(s, a->rm),
8330 vec_full_reg_offset(s, a->ra),
8331 status, vsz, vsz, (a->index << 1) | sel,
8332 gen_helper_gvec_bfmlal_idx);
8333 tcg_temp_free_ptr(status);
8334 }
8335 return true;
8336}
8337
8338static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8339{
8340 return do_BFMLAL_zzxw(s, a, false);
8341}
8342
8343static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8344{
8345 return do_BFMLAL_zzxw(s, a, true);
8346}