]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Introduce gen_gvec_fn_arg_zzi
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638 178/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 179static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
180 int rd, int rn, int rm, int ra, int data)
181{
7ad416b1
RH
182 if (fn == NULL) {
183 return false;
184 }
185 if (sve_access_check(s)) {
186 unsigned vsz = vec_full_reg_size(s);
187 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
188 vec_full_reg_offset(s, rn),
189 vec_full_reg_offset(s, rm),
190 vec_full_reg_offset(s, ra),
191 vsz, vsz, data, fn);
192 }
193 return true;
38650638
RH
194}
195
cab79ac9
RH
196static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
197 arg_rrrr_esz *a, int data)
198{
199 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
200}
201
e82d3536
RH
202static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
203 arg_rrxr_esz *a)
204{
205 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
206}
207
96a461f7 208/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 209static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
210 int rd, int rn, int pg, int data)
211{
8fb27a21
RH
212 if (fn == NULL) {
213 return false;
214 }
215 if (sve_access_check(s)) {
216 unsigned vsz = vec_full_reg_size(s);
217 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
218 vec_full_reg_offset(s, rn),
219 pred_full_reg_offset(s, pg),
220 vsz, vsz, data, fn);
221 }
222 return true;
96a461f7
RH
223}
224
b051809a
RH
225static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
226 arg_rpr_esz *a, int data)
227{
228 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
229}
230
afa2529c
RH
231static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
232 arg_rpri_esz *a)
233{
234 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
235}
b051809a 236
36cbb7a8 237/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 238static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
239 int rd, int rn, int rm, int pg, int data)
240{
2a753d1e
RH
241 if (fn == NULL) {
242 return false;
243 }
244 if (sve_access_check(s)) {
245 unsigned vsz = vec_full_reg_size(s);
246 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
247 vec_full_reg_offset(s, rn),
248 vec_full_reg_offset(s, rm),
249 pred_full_reg_offset(s, pg),
250 vsz, vsz, data, fn);
251 }
252 return true;
36cbb7a8 253}
f7d79c41 254
312016c9
RH
255static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
256 arg_rprr_esz *a, int data)
257{
258 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
259}
260
faf915e2
RH
261/* Invoke a vector expander on two Zregs and an immediate. */
262static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
263 int esz, int rd, int rn, uint64_t imm)
264{
265 if (gvec_fn == NULL) {
266 return false;
267 }
268 if (sve_access_check(s)) {
269 unsigned vsz = vec_full_reg_size(s);
270 gvec_fn(esz, vec_full_reg_offset(s, rd),
271 vec_full_reg_offset(s, rn), imm, vsz, vsz);
272 }
273 return true;
274}
275
ada378f0
RH
276static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
277 arg_rri_esz *a)
278{
279 if (a->esz < 0) {
280 /* Invalid tsz encoding -- see tszimm_esz. */
281 return false;
282 }
283 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
284}
285
39eea561 286/* Invoke a vector expander on three Zregs. */
50f6db5f 287static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 288 int esz, int rd, int rn, int rm)
38388f7e 289{
50f6db5f
RH
290 if (gvec_fn == NULL) {
291 return false;
292 }
293 if (sve_access_check(s)) {
294 unsigned vsz = vec_full_reg_size(s);
295 gvec_fn(esz, vec_full_reg_offset(s, rd),
296 vec_full_reg_offset(s, rn),
297 vec_full_reg_offset(s, rm), vsz, vsz);
298 }
299 return true;
38388f7e
RH
300}
301
cd54bbe6
RH
302static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
303 arg_rrr_esz *a)
304{
305 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
306}
307
911cdc6d 308/* Invoke a vector expander on four Zregs. */
189876af
RH
309static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
310 arg_rrrr_esz *a)
911cdc6d 311{
189876af
RH
312 if (gvec_fn == NULL) {
313 return false;
314 }
315 if (sve_access_check(s)) {
316 unsigned vsz = vec_full_reg_size(s);
317 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
318 vec_full_reg_offset(s, a->rn),
319 vec_full_reg_offset(s, a->rm),
320 vec_full_reg_offset(s, a->ra), vsz, vsz);
321 }
322 return true;
911cdc6d
RH
323}
324
39eea561
RH
325/* Invoke a vector move on two Zregs. */
326static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 327{
f7d79c41 328 if (sve_access_check(s)) {
5f730621
RH
329 unsigned vsz = vec_full_reg_size(s);
330 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
331 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
332 }
333 return true;
38388f7e
RH
334}
335
d9d78dcc
RH
336/* Initialize a Zreg with replications of a 64-bit immediate. */
337static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
338{
339 unsigned vsz = vec_full_reg_size(s);
8711e71f 340 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
341}
342
516e246a 343/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
344static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
345 int rd, int rn, int rm)
516e246a 346{
dd81a8d7
RH
347 unsigned psz = pred_gvec_reg_size(s);
348 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
349 pred_full_reg_offset(s, rn),
350 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
351}
352
353/* Invoke a vector move on two Pregs. */
354static bool do_mov_p(DisasContext *s, int rd, int rn)
355{
d0b2df5a
RH
356 if (sve_access_check(s)) {
357 unsigned psz = pred_gvec_reg_size(s);
358 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
359 pred_full_reg_offset(s, rn), psz, psz);
360 }
361 return true;
516e246a
RH
362}
363
9e18d7a6
RH
364/* Set the cpu flags as per a return from an SVE helper. */
365static void do_pred_flags(TCGv_i32 t)
366{
367 tcg_gen_mov_i32(cpu_NF, t);
368 tcg_gen_andi_i32(cpu_ZF, t, 2);
369 tcg_gen_andi_i32(cpu_CF, t, 1);
370 tcg_gen_movi_i32(cpu_VF, 0);
371}
372
373/* Subroutines computing the ARM PredTest psuedofunction. */
374static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
375{
376 TCGv_i32 t = tcg_temp_new_i32();
377
378 gen_helper_sve_predtest1(t, d, g);
379 do_pred_flags(t);
380 tcg_temp_free_i32(t);
381}
382
383static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
384{
385 TCGv_ptr dptr = tcg_temp_new_ptr();
386 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 387 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
388
389 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
390 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 391
392acacc 392 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
393 tcg_temp_free_ptr(dptr);
394 tcg_temp_free_ptr(gptr);
395
396 do_pred_flags(t);
397 tcg_temp_free_i32(t);
398}
399
028e2a7b
RH
400/* For each element size, the bits within a predicate word that are active. */
401const uint64_t pred_esz_masks[4] = {
402 0xffffffffffffffffull, 0x5555555555555555ull,
403 0x1111111111111111ull, 0x0101010101010101ull
404};
405
39eea561
RH
406/*
407 *** SVE Logical - Unpredicated Group
408 */
409
b262215b
RH
410TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
411TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
412TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
413TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 414
e6eba6e5
RH
415static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
416{
417 TCGv_i64 t = tcg_temp_new_i64();
418 uint64_t mask = dup_const(MO_8, 0xff >> sh);
419
420 tcg_gen_xor_i64(t, n, m);
421 tcg_gen_shri_i64(d, t, sh);
422 tcg_gen_shli_i64(t, t, 8 - sh);
423 tcg_gen_andi_i64(d, d, mask);
424 tcg_gen_andi_i64(t, t, ~mask);
425 tcg_gen_or_i64(d, d, t);
426 tcg_temp_free_i64(t);
427}
428
429static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
430{
431 TCGv_i64 t = tcg_temp_new_i64();
432 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
433
434 tcg_gen_xor_i64(t, n, m);
435 tcg_gen_shri_i64(d, t, sh);
436 tcg_gen_shli_i64(t, t, 16 - sh);
437 tcg_gen_andi_i64(d, d, mask);
438 tcg_gen_andi_i64(t, t, ~mask);
439 tcg_gen_or_i64(d, d, t);
440 tcg_temp_free_i64(t);
441}
442
443static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
444{
445 tcg_gen_xor_i32(d, n, m);
446 tcg_gen_rotri_i32(d, d, sh);
447}
448
449static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
450{
451 tcg_gen_xor_i64(d, n, m);
452 tcg_gen_rotri_i64(d, d, sh);
453}
454
455static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
456 TCGv_vec m, int64_t sh)
457{
458 tcg_gen_xor_vec(vece, d, n, m);
459 tcg_gen_rotri_vec(vece, d, d, sh);
460}
461
462void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
463 uint32_t rm_ofs, int64_t shift,
464 uint32_t opr_sz, uint32_t max_sz)
465{
466 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
467 static const GVecGen3i ops[4] = {
468 { .fni8 = gen_xar8_i64,
469 .fniv = gen_xar_vec,
470 .fno = gen_helper_sve2_xar_b,
471 .opt_opc = vecop,
472 .vece = MO_8 },
473 { .fni8 = gen_xar16_i64,
474 .fniv = gen_xar_vec,
475 .fno = gen_helper_sve2_xar_h,
476 .opt_opc = vecop,
477 .vece = MO_16 },
478 { .fni4 = gen_xar_i32,
479 .fniv = gen_xar_vec,
480 .fno = gen_helper_sve2_xar_s,
481 .opt_opc = vecop,
482 .vece = MO_32 },
483 { .fni8 = gen_xar_i64,
484 .fniv = gen_xar_vec,
485 .fno = gen_helper_gvec_xar_d,
486 .opt_opc = vecop,
487 .vece = MO_64 }
488 };
489 int esize = 8 << vece;
490
491 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
492 tcg_debug_assert(shift >= 0);
493 tcg_debug_assert(shift <= esize);
494 shift &= esize - 1;
495
496 if (shift == 0) {
497 /* xar with no rotate devolves to xor. */
498 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
499 } else {
500 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
501 shift, &ops[vece]);
502 }
503}
504
505static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
506{
507 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
508 return false;
509 }
510 if (sve_access_check(s)) {
511 unsigned vsz = vec_full_reg_size(s);
512 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
513 vec_full_reg_offset(s, a->rn),
514 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
515 }
516 return true;
517}
518
911cdc6d
RH
519static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
520{
521 tcg_gen_xor_i64(d, n, m);
522 tcg_gen_xor_i64(d, d, k);
523}
524
525static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
526 TCGv_vec m, TCGv_vec k)
527{
528 tcg_gen_xor_vec(vece, d, n, m);
529 tcg_gen_xor_vec(vece, d, d, k);
530}
531
532static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
533 uint32_t a, uint32_t oprsz, uint32_t maxsz)
534{
535 static const GVecGen4 op = {
536 .fni8 = gen_eor3_i64,
537 .fniv = gen_eor3_vec,
538 .fno = gen_helper_sve2_eor3,
539 .vece = MO_64,
540 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
541 };
542 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
543}
544
b773a5c8 545TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
546
547static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
548{
549 tcg_gen_andc_i64(d, m, k);
550 tcg_gen_xor_i64(d, d, n);
551}
552
553static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
554 TCGv_vec m, TCGv_vec k)
555{
556 tcg_gen_andc_vec(vece, d, m, k);
557 tcg_gen_xor_vec(vece, d, d, n);
558}
559
560static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
561 uint32_t a, uint32_t oprsz, uint32_t maxsz)
562{
563 static const GVecGen4 op = {
564 .fni8 = gen_bcax_i64,
565 .fniv = gen_bcax_vec,
566 .fno = gen_helper_sve2_bcax,
567 .vece = MO_64,
568 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
569 };
570 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
571}
572
b773a5c8 573TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
574
575static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
576 uint32_t a, uint32_t oprsz, uint32_t maxsz)
577{
578 /* BSL differs from the generic bitsel in argument ordering. */
579 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
580}
581
b773a5c8 582TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
583
584static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
585{
586 tcg_gen_andc_i64(n, k, n);
587 tcg_gen_andc_i64(m, m, k);
588 tcg_gen_or_i64(d, n, m);
589}
590
591static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
592 TCGv_vec m, TCGv_vec k)
593{
594 if (TCG_TARGET_HAS_bitsel_vec) {
595 tcg_gen_not_vec(vece, n, n);
596 tcg_gen_bitsel_vec(vece, d, k, n, m);
597 } else {
598 tcg_gen_andc_vec(vece, n, k, n);
599 tcg_gen_andc_vec(vece, m, m, k);
600 tcg_gen_or_vec(vece, d, n, m);
601 }
602}
603
604static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
605 uint32_t a, uint32_t oprsz, uint32_t maxsz)
606{
607 static const GVecGen4 op = {
608 .fni8 = gen_bsl1n_i64,
609 .fniv = gen_bsl1n_vec,
610 .fno = gen_helper_sve2_bsl1n,
611 .vece = MO_64,
612 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
613 };
614 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
615}
616
b773a5c8 617TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
618
619static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
620{
621 /*
622 * Z[dn] = (n & k) | (~m & ~k)
623 * = | ~(m | k)
624 */
625 tcg_gen_and_i64(n, n, k);
626 if (TCG_TARGET_HAS_orc_i64) {
627 tcg_gen_or_i64(m, m, k);
628 tcg_gen_orc_i64(d, n, m);
629 } else {
630 tcg_gen_nor_i64(m, m, k);
631 tcg_gen_or_i64(d, n, m);
632 }
633}
634
635static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
636 TCGv_vec m, TCGv_vec k)
637{
638 if (TCG_TARGET_HAS_bitsel_vec) {
639 tcg_gen_not_vec(vece, m, m);
640 tcg_gen_bitsel_vec(vece, d, k, n, m);
641 } else {
642 tcg_gen_and_vec(vece, n, n, k);
643 tcg_gen_or_vec(vece, m, m, k);
644 tcg_gen_orc_vec(vece, d, n, m);
645 }
646}
647
648static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
649 uint32_t a, uint32_t oprsz, uint32_t maxsz)
650{
651 static const GVecGen4 op = {
652 .fni8 = gen_bsl2n_i64,
653 .fniv = gen_bsl2n_vec,
654 .fno = gen_helper_sve2_bsl2n,
655 .vece = MO_64,
656 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
657 };
658 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
659}
660
b773a5c8 661TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
662
663static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
664{
665 tcg_gen_and_i64(n, n, k);
666 tcg_gen_andc_i64(m, m, k);
667 tcg_gen_nor_i64(d, n, m);
668}
669
670static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
671 TCGv_vec m, TCGv_vec k)
672{
673 tcg_gen_bitsel_vec(vece, d, k, n, m);
674 tcg_gen_not_vec(vece, d, d);
675}
676
677static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
678 uint32_t a, uint32_t oprsz, uint32_t maxsz)
679{
680 static const GVecGen4 op = {
681 .fni8 = gen_nbsl_i64,
682 .fniv = gen_nbsl_vec,
683 .fno = gen_helper_sve2_nbsl,
684 .vece = MO_64,
685 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
686 };
687 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
688}
689
b773a5c8 690TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 691
fea98f9c
RH
692/*
693 *** SVE Integer Arithmetic - Unpredicated Group
694 */
695
b262215b
RH
696TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
697TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
698TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
699TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
700TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
701TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 702
f97cfd59
RH
703/*
704 *** SVE Integer Arithmetic - Binary Predicated Group
705 */
706
a2103582
RH
707/* Select active elememnts from Zn and inactive elements from Zm,
708 * storing the result in Zd.
709 */
68cc4ee3 710static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
711{
712 static gen_helper_gvec_4 * const fns[4] = {
713 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
714 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
715 };
68cc4ee3 716 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
717}
718
8e7acb24
RH
719#define DO_ZPZZ(NAME, FEAT, name) \
720 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
721 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
722 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 723 }; \
8e7acb24
RH
724 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
725 name##_zpzz_fns[a->esz], a, 0)
726
727DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
728DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
729DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
730DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
731
732DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
733DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
734
735DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
736DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
737DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
738DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
739DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
740DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
741
742DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
743DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
744DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
745
746DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
747DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
748DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
749
750static gen_helper_gvec_4 * const sdiv_fns[4] = {
751 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
752};
753TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 754
8e7acb24
RH
755static gen_helper_gvec_4 * const udiv_fns[4] = {
756 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
757};
758TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 759
3a7be554 760static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582 761{
68cc4ee3 762 return do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
a2103582 763}
d3fe4a29 764
afac6d04
RH
765/*
766 *** SVE Integer Arithmetic - Unary Predicated Group
767 */
768
817bd5c9
RH
769#define DO_ZPZ(NAME, FEAT, name) \
770 static gen_helper_gvec_3 * const name##_fns[4] = { \
771 gen_helper_##name##_b, gen_helper_##name##_h, \
772 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 773 }; \
817bd5c9
RH
774 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
775
776DO_ZPZ(CLS, aa64_sve, sve_cls)
777DO_ZPZ(CLZ, aa64_sve, sve_clz)
778DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
779DO_ZPZ(CNOT, aa64_sve, sve_cnot)
780DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
781DO_ZPZ(ABS, aa64_sve, sve_abs)
782DO_ZPZ(NEG, aa64_sve, sve_neg)
783DO_ZPZ(RBIT, aa64_sve, sve_rbit)
784
785static gen_helper_gvec_3 * const fabs_fns[4] = {
786 NULL, gen_helper_sve_fabs_h,
787 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
788};
789TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 790
817bd5c9
RH
791static gen_helper_gvec_3 * const fneg_fns[4] = {
792 NULL, gen_helper_sve_fneg_h,
793 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
794};
795TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 796
817bd5c9
RH
797static gen_helper_gvec_3 * const sxtb_fns[4] = {
798 NULL, gen_helper_sve_sxtb_h,
799 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
800};
801TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 802
817bd5c9
RH
803static gen_helper_gvec_3 * const uxtb_fns[4] = {
804 NULL, gen_helper_sve_uxtb_h,
805 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
806};
807TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 808
817bd5c9
RH
809static gen_helper_gvec_3 * const sxth_fns[4] = {
810 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
811};
812TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 813
817bd5c9
RH
814static gen_helper_gvec_3 * const uxth_fns[4] = {
815 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
816};
817TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 818
817bd5c9
RH
819TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
820 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
821TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
822 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 823
047cec97
RH
824/*
825 *** SVE Integer Reduction Group
826 */
827
828typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
829static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
830 gen_helper_gvec_reduc *fn)
831{
832 unsigned vsz = vec_full_reg_size(s);
833 TCGv_ptr t_zn, t_pg;
834 TCGv_i32 desc;
835 TCGv_i64 temp;
836
837 if (fn == NULL) {
838 return false;
839 }
840 if (!sve_access_check(s)) {
841 return true;
842 }
843
c6a59b55 844 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
845 temp = tcg_temp_new_i64();
846 t_zn = tcg_temp_new_ptr();
847 t_pg = tcg_temp_new_ptr();
848
849 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
850 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
851 fn(temp, t_zn, t_pg, desc);
852 tcg_temp_free_ptr(t_zn);
853 tcg_temp_free_ptr(t_pg);
047cec97
RH
854
855 write_fp_dreg(s, a->rd, temp);
856 tcg_temp_free_i64(temp);
857 return true;
858}
859
860#define DO_VPZ(NAME, name) \
3a7be554 861static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
862{ \
863 static gen_helper_gvec_reduc * const fns[4] = { \
864 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
865 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
866 }; \
867 return do_vpz_ool(s, a, fns[a->esz]); \
868}
869
870DO_VPZ(ORV, orv)
871DO_VPZ(ANDV, andv)
872DO_VPZ(EORV, eorv)
873
874DO_VPZ(UADDV, uaddv)
875DO_VPZ(SMAXV, smaxv)
876DO_VPZ(UMAXV, umaxv)
877DO_VPZ(SMINV, sminv)
878DO_VPZ(UMINV, uminv)
879
3a7be554 880static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
881{
882 static gen_helper_gvec_reduc * const fns[4] = {
883 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
884 gen_helper_sve_saddv_s, NULL
885 };
886 return do_vpz_ool(s, a, fns[a->esz]);
887}
888
889#undef DO_VPZ
890
ccd841c3
RH
891/*
892 *** SVE Shift by Immediate - Predicated Group
893 */
894
60245996
RH
895/*
896 * Copy Zn into Zd, storing zeros into inactive elements.
897 * If invert, store zeros into the active elements.
ccd841c3 898 */
60245996
RH
899static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
900 int esz, bool invert)
ccd841c3 901{
60245996
RH
902 static gen_helper_gvec_3 * const fns[4] = {
903 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
904 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 905 };
8fb27a21 906 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
907}
908
3a7be554 909static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
910{
911 static gen_helper_gvec_3 * const fns[4] = {
912 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
913 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
914 };
915 if (a->esz < 0) {
916 /* Invalid tsz encoding -- see tszimm_esz. */
917 return false;
918 }
919 /* Shift by element size is architecturally valid. For
920 arithmetic right-shift, it's the same as by one less. */
921 a->imm = MIN(a->imm, (8 << a->esz) - 1);
afa2529c 922 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
923}
924
3a7be554 925static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
926{
927 static gen_helper_gvec_3 * const fns[4] = {
928 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
929 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
930 };
931 if (a->esz < 0) {
932 return false;
933 }
934 /* Shift by element size is architecturally valid.
935 For logical shifts, it is a zeroing operation. */
936 if (a->imm >= (8 << a->esz)) {
60245996 937 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 938 } else {
afa2529c 939 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
940 }
941}
942
3a7be554 943static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
944{
945 static gen_helper_gvec_3 * const fns[4] = {
946 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
947 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
948 };
949 if (a->esz < 0) {
950 return false;
951 }
952 /* Shift by element size is architecturally valid.
953 For logical shifts, it is a zeroing operation. */
954 if (a->imm >= (8 << a->esz)) {
60245996 955 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 956 } else {
afa2529c 957 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
958 }
959}
960
3a7be554 961static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
962{
963 static gen_helper_gvec_3 * const fns[4] = {
964 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
965 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
966 };
967 if (a->esz < 0) {
968 return false;
969 }
970 /* Shift by element size is architecturally valid. For arithmetic
971 right shift for division, it is a zeroing operation. */
972 if (a->imm >= (8 << a->esz)) {
60245996 973 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 974 } else {
afa2529c 975 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
976 }
977}
978
4df37e41
RH
979static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
980 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
981 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
982};
983TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
984 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 985
4df37e41
RH
986static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
987 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
988 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
989};
990TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
991 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 992
4df37e41
RH
993static gen_helper_gvec_3 * const srshr_fns[4] = {
994 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
995 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
996};
997TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
998 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 999
4df37e41
RH
1000static gen_helper_gvec_3 * const urshr_fns[4] = {
1001 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1002 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1003};
1004TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1005 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1006
4df37e41
RH
1007static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1008 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1009 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1010};
1011TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1012 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1013
fe7f8dfb
RH
1014/*
1015 *** SVE Bitwise Shift - Predicated Group
1016 */
1017
1018#define DO_ZPZW(NAME, name) \
8e7acb24 1019 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1020 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1021 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1022 }; \
8e7acb24
RH
1023 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1024 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1025
1026DO_ZPZW(ASR, asr)
1027DO_ZPZW(LSR, lsr)
1028DO_ZPZW(LSL, lsl)
1029
1030#undef DO_ZPZW
1031
d9d78dcc
RH
1032/*
1033 *** SVE Bitwise Shift - Unpredicated Group
1034 */
1035
1036static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1037 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1038 int64_t, uint32_t, uint32_t))
1039{
1040 if (a->esz < 0) {
1041 /* Invalid tsz encoding -- see tszimm_esz. */
1042 return false;
1043 }
1044 if (sve_access_check(s)) {
1045 unsigned vsz = vec_full_reg_size(s);
1046 /* Shift by element size is architecturally valid. For
1047 arithmetic right-shift, it's the same as by one less.
1048 Otherwise it is a zeroing operation. */
1049 if (a->imm >= 8 << a->esz) {
1050 if (asr) {
1051 a->imm = (8 << a->esz) - 1;
1052 } else {
1053 do_dupi_z(s, a->rd, 0);
1054 return true;
1055 }
1056 }
1057 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1058 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1059 }
1060 return true;
1061}
1062
3a7be554 1063static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1064{
1065 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1066}
1067
3a7be554 1068static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1069{
1070 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1071}
1072
3a7be554 1073static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1074{
1075 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1076}
1077
d9d78dcc 1078#define DO_ZZW(NAME, name) \
32e2ad65 1079 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1080 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1081 gen_helper_sve_##name##_zzw_s, NULL \
1082 }; \
32e2ad65
RH
1083 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1084 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1085
32e2ad65
RH
1086DO_ZZW(ASR_zzw, asr)
1087DO_ZZW(LSR_zzw, lsr)
1088DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1089
1090#undef DO_ZZW
1091
96a36e4a
RH
1092/*
1093 *** SVE Integer Multiply-Add Group
1094 */
1095
1096static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1097 gen_helper_gvec_5 *fn)
1098{
1099 if (sve_access_check(s)) {
1100 unsigned vsz = vec_full_reg_size(s);
1101 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1102 vec_full_reg_offset(s, a->ra),
1103 vec_full_reg_offset(s, a->rn),
1104 vec_full_reg_offset(s, a->rm),
1105 pred_full_reg_offset(s, a->pg),
1106 vsz, vsz, 0, fn);
1107 }
1108 return true;
1109}
1110
1111#define DO_ZPZZZ(NAME, name) \
3a7be554 1112static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1113{ \
1114 static gen_helper_gvec_5 * const fns[4] = { \
1115 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1116 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1117 }; \
1118 return do_zpzzz_ool(s, a, fns[a->esz]); \
1119}
1120
1121DO_ZPZZZ(MLA, mla)
1122DO_ZPZZZ(MLS, mls)
1123
1124#undef DO_ZPZZZ
1125
9a56c9c3
RH
1126/*
1127 *** SVE Index Generation Group
1128 */
1129
1130static void do_index(DisasContext *s, int esz, int rd,
1131 TCGv_i64 start, TCGv_i64 incr)
1132{
1133 unsigned vsz = vec_full_reg_size(s);
c6a59b55 1134 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
9a56c9c3
RH
1135 TCGv_ptr t_zd = tcg_temp_new_ptr();
1136
1137 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1138 if (esz == 3) {
1139 gen_helper_sve_index_d(t_zd, start, incr, desc);
1140 } else {
1141 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1142 static index_fn * const fns[3] = {
1143 gen_helper_sve_index_b,
1144 gen_helper_sve_index_h,
1145 gen_helper_sve_index_s,
1146 };
1147 TCGv_i32 s32 = tcg_temp_new_i32();
1148 TCGv_i32 i32 = tcg_temp_new_i32();
1149
1150 tcg_gen_extrl_i64_i32(s32, start);
1151 tcg_gen_extrl_i64_i32(i32, incr);
1152 fns[esz](t_zd, s32, i32, desc);
1153
1154 tcg_temp_free_i32(s32);
1155 tcg_temp_free_i32(i32);
1156 }
1157 tcg_temp_free_ptr(t_zd);
9a56c9c3
RH
1158}
1159
3a7be554 1160static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1161{
1162 if (sve_access_check(s)) {
b0c3aece
RH
1163 TCGv_i64 start = tcg_constant_i64(a->imm1);
1164 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1165 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1166 }
1167 return true;
1168}
1169
3a7be554 1170static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1171{
1172 if (sve_access_check(s)) {
b0c3aece 1173 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1174 TCGv_i64 incr = cpu_reg(s, a->rm);
1175 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1176 }
1177 return true;
1178}
1179
3a7be554 1180static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1181{
1182 if (sve_access_check(s)) {
1183 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1184 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1185 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1186 }
1187 return true;
1188}
1189
3a7be554 1190static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1191{
1192 if (sve_access_check(s)) {
1193 TCGv_i64 start = cpu_reg(s, a->rn);
1194 TCGv_i64 incr = cpu_reg(s, a->rm);
1195 do_index(s, a->esz, a->rd, start, incr);
1196 }
1197 return true;
1198}
1199
96f922cc
RH
1200/*
1201 *** SVE Stack Allocation Group
1202 */
1203
3a7be554 1204static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1205{
5de56742
AC
1206 if (sve_access_check(s)) {
1207 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1208 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1209 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1210 }
96f922cc
RH
1211 return true;
1212}
1213
3a7be554 1214static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1215{
5de56742
AC
1216 if (sve_access_check(s)) {
1217 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1218 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1219 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1220 }
96f922cc
RH
1221 return true;
1222}
1223
3a7be554 1224static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1225{
5de56742
AC
1226 if (sve_access_check(s)) {
1227 TCGv_i64 reg = cpu_reg(s, a->rd);
1228 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1229 }
96f922cc
RH
1230 return true;
1231}
1232
4b242d9c
RH
1233/*
1234 *** SVE Compute Vector Address Group
1235 */
1236
1237static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1238{
913a8a00 1239 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1240}
1241
3a7be554 1242static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1243{
1244 return do_adr(s, a, gen_helper_sve_adr_p32);
1245}
1246
3a7be554 1247static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1248{
1249 return do_adr(s, a, gen_helper_sve_adr_p64);
1250}
1251
3a7be554 1252static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1253{
1254 return do_adr(s, a, gen_helper_sve_adr_s32);
1255}
1256
3a7be554 1257static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1258{
1259 return do_adr(s, a, gen_helper_sve_adr_u32);
1260}
1261
0762cd42
RH
1262/*
1263 *** SVE Integer Misc - Unpredicated Group
1264 */
1265
0ea3cdbf
RH
1266static gen_helper_gvec_2 * const fexpa_fns[4] = {
1267 NULL, gen_helper_sve_fexpa_h,
1268 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1269};
1270TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1271 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1272
32e2ad65
RH
1273static gen_helper_gvec_3 * const ftssel_fns[4] = {
1274 NULL, gen_helper_sve_ftssel_h,
1275 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1276};
1277TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1278
516e246a
RH
1279/*
1280 *** SVE Predicate Logical Operations Group
1281 */
1282
1283static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1284 const GVecGen4 *gvec_op)
1285{
1286 if (!sve_access_check(s)) {
1287 return true;
1288 }
1289
1290 unsigned psz = pred_gvec_reg_size(s);
1291 int dofs = pred_full_reg_offset(s, a->rd);
1292 int nofs = pred_full_reg_offset(s, a->rn);
1293 int mofs = pred_full_reg_offset(s, a->rm);
1294 int gofs = pred_full_reg_offset(s, a->pg);
1295
dd81a8d7
RH
1296 if (!a->s) {
1297 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1298 return true;
1299 }
1300
516e246a
RH
1301 if (psz == 8) {
1302 /* Do the operation and the flags generation in temps. */
1303 TCGv_i64 pd = tcg_temp_new_i64();
1304 TCGv_i64 pn = tcg_temp_new_i64();
1305 TCGv_i64 pm = tcg_temp_new_i64();
1306 TCGv_i64 pg = tcg_temp_new_i64();
1307
1308 tcg_gen_ld_i64(pn, cpu_env, nofs);
1309 tcg_gen_ld_i64(pm, cpu_env, mofs);
1310 tcg_gen_ld_i64(pg, cpu_env, gofs);
1311
1312 gvec_op->fni8(pd, pn, pm, pg);
1313 tcg_gen_st_i64(pd, cpu_env, dofs);
1314
1315 do_predtest1(pd, pg);
1316
1317 tcg_temp_free_i64(pd);
1318 tcg_temp_free_i64(pn);
1319 tcg_temp_free_i64(pm);
1320 tcg_temp_free_i64(pg);
1321 } else {
1322 /* The operation and flags generation is large. The computation
1323 * of the flags depends on the original contents of the guarding
1324 * predicate. If the destination overwrites the guarding predicate,
1325 * then the easiest way to get this right is to save a copy.
1326 */
1327 int tofs = gofs;
1328 if (a->rd == a->pg) {
1329 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1330 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1331 }
1332
1333 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1334 do_predtest(s, dofs, tofs, psz / 8);
1335 }
1336 return true;
1337}
1338
1339static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1340{
1341 tcg_gen_and_i64(pd, pn, pm);
1342 tcg_gen_and_i64(pd, pd, pg);
1343}
1344
1345static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1346 TCGv_vec pm, TCGv_vec pg)
1347{
1348 tcg_gen_and_vec(vece, pd, pn, pm);
1349 tcg_gen_and_vec(vece, pd, pd, pg);
1350}
1351
3a7be554 1352static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1353{
1354 static const GVecGen4 op = {
1355 .fni8 = gen_and_pg_i64,
1356 .fniv = gen_and_pg_vec,
1357 .fno = gen_helper_sve_and_pppp,
1358 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1359 };
dd81a8d7
RH
1360
1361 if (!a->s) {
1362 if (!sve_access_check(s)) {
1363 return true;
1364 }
1365 if (a->rn == a->rm) {
1366 if (a->pg == a->rn) {
1367 do_mov_p(s, a->rd, a->rn);
1368 } else {
1369 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1370 }
1371 return true;
1372 } else if (a->pg == a->rn || a->pg == a->rm) {
1373 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1374 return true;
516e246a 1375 }
516e246a 1376 }
dd81a8d7 1377 return do_pppp_flags(s, a, &op);
516e246a
RH
1378}
1379
1380static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1381{
1382 tcg_gen_andc_i64(pd, pn, pm);
1383 tcg_gen_and_i64(pd, pd, pg);
1384}
1385
1386static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1387 TCGv_vec pm, TCGv_vec pg)
1388{
1389 tcg_gen_andc_vec(vece, pd, pn, pm);
1390 tcg_gen_and_vec(vece, pd, pd, pg);
1391}
1392
3a7be554 1393static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1394{
1395 static const GVecGen4 op = {
1396 .fni8 = gen_bic_pg_i64,
1397 .fniv = gen_bic_pg_vec,
1398 .fno = gen_helper_sve_bic_pppp,
1399 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1400 };
dd81a8d7
RH
1401
1402 if (!a->s && a->pg == a->rn) {
1403 if (sve_access_check(s)) {
1404 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1405 }
1406 return true;
516e246a 1407 }
dd81a8d7 1408 return do_pppp_flags(s, a, &op);
516e246a
RH
1409}
1410
1411static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1412{
1413 tcg_gen_xor_i64(pd, pn, pm);
1414 tcg_gen_and_i64(pd, pd, pg);
1415}
1416
1417static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1418 TCGv_vec pm, TCGv_vec pg)
1419{
1420 tcg_gen_xor_vec(vece, pd, pn, pm);
1421 tcg_gen_and_vec(vece, pd, pd, pg);
1422}
1423
3a7be554 1424static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1425{
1426 static const GVecGen4 op = {
1427 .fni8 = gen_eor_pg_i64,
1428 .fniv = gen_eor_pg_vec,
1429 .fno = gen_helper_sve_eor_pppp,
1430 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1431 };
dd81a8d7 1432 return do_pppp_flags(s, a, &op);
516e246a
RH
1433}
1434
3a7be554 1435static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1436{
516e246a
RH
1437 if (a->s) {
1438 return false;
516e246a 1439 }
d4bc6232
RH
1440 if (sve_access_check(s)) {
1441 unsigned psz = pred_gvec_reg_size(s);
1442 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1443 pred_full_reg_offset(s, a->pg),
1444 pred_full_reg_offset(s, a->rn),
1445 pred_full_reg_offset(s, a->rm), psz, psz);
1446 }
1447 return true;
516e246a
RH
1448}
1449
1450static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1451{
1452 tcg_gen_or_i64(pd, pn, pm);
1453 tcg_gen_and_i64(pd, pd, pg);
1454}
1455
1456static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1457 TCGv_vec pm, TCGv_vec pg)
1458{
1459 tcg_gen_or_vec(vece, pd, pn, pm);
1460 tcg_gen_and_vec(vece, pd, pd, pg);
1461}
1462
3a7be554 1463static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1464{
1465 static const GVecGen4 op = {
1466 .fni8 = gen_orr_pg_i64,
1467 .fniv = gen_orr_pg_vec,
1468 .fno = gen_helper_sve_orr_pppp,
1469 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1470 };
dd81a8d7
RH
1471
1472 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1473 return do_mov_p(s, a->rd, a->rn);
516e246a 1474 }
dd81a8d7 1475 return do_pppp_flags(s, a, &op);
516e246a
RH
1476}
1477
1478static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1479{
1480 tcg_gen_orc_i64(pd, pn, pm);
1481 tcg_gen_and_i64(pd, pd, pg);
1482}
1483
1484static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1485 TCGv_vec pm, TCGv_vec pg)
1486{
1487 tcg_gen_orc_vec(vece, pd, pn, pm);
1488 tcg_gen_and_vec(vece, pd, pd, pg);
1489}
1490
3a7be554 1491static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1492{
1493 static const GVecGen4 op = {
1494 .fni8 = gen_orn_pg_i64,
1495 .fniv = gen_orn_pg_vec,
1496 .fno = gen_helper_sve_orn_pppp,
1497 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1498 };
dd81a8d7 1499 return do_pppp_flags(s, a, &op);
516e246a
RH
1500}
1501
1502static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1503{
1504 tcg_gen_or_i64(pd, pn, pm);
1505 tcg_gen_andc_i64(pd, pg, pd);
1506}
1507
1508static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1509 TCGv_vec pm, TCGv_vec pg)
1510{
1511 tcg_gen_or_vec(vece, pd, pn, pm);
1512 tcg_gen_andc_vec(vece, pd, pg, pd);
1513}
1514
3a7be554 1515static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1516{
1517 static const GVecGen4 op = {
1518 .fni8 = gen_nor_pg_i64,
1519 .fniv = gen_nor_pg_vec,
1520 .fno = gen_helper_sve_nor_pppp,
1521 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1522 };
dd81a8d7 1523 return do_pppp_flags(s, a, &op);
516e246a
RH
1524}
1525
1526static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1527{
1528 tcg_gen_and_i64(pd, pn, pm);
1529 tcg_gen_andc_i64(pd, pg, pd);
1530}
1531
1532static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1533 TCGv_vec pm, TCGv_vec pg)
1534{
1535 tcg_gen_and_vec(vece, pd, pn, pm);
1536 tcg_gen_andc_vec(vece, pd, pg, pd);
1537}
1538
3a7be554 1539static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1540{
1541 static const GVecGen4 op = {
1542 .fni8 = gen_nand_pg_i64,
1543 .fniv = gen_nand_pg_vec,
1544 .fno = gen_helper_sve_nand_pppp,
1545 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1546 };
dd81a8d7 1547 return do_pppp_flags(s, a, &op);
516e246a
RH
1548}
1549
9e18d7a6
RH
1550/*
1551 *** SVE Predicate Misc Group
1552 */
1553
3a7be554 1554static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1555{
1556 if (sve_access_check(s)) {
1557 int nofs = pred_full_reg_offset(s, a->rn);
1558 int gofs = pred_full_reg_offset(s, a->pg);
1559 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1560
1561 if (words == 1) {
1562 TCGv_i64 pn = tcg_temp_new_i64();
1563 TCGv_i64 pg = tcg_temp_new_i64();
1564
1565 tcg_gen_ld_i64(pn, cpu_env, nofs);
1566 tcg_gen_ld_i64(pg, cpu_env, gofs);
1567 do_predtest1(pn, pg);
1568
1569 tcg_temp_free_i64(pn);
1570 tcg_temp_free_i64(pg);
1571 } else {
1572 do_predtest(s, nofs, gofs, words);
1573 }
1574 }
1575 return true;
1576}
1577
028e2a7b
RH
1578/* See the ARM pseudocode DecodePredCount. */
1579static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1580{
1581 unsigned elements = fullsz >> esz;
1582 unsigned bound;
1583
1584 switch (pattern) {
1585 case 0x0: /* POW2 */
1586 return pow2floor(elements);
1587 case 0x1: /* VL1 */
1588 case 0x2: /* VL2 */
1589 case 0x3: /* VL3 */
1590 case 0x4: /* VL4 */
1591 case 0x5: /* VL5 */
1592 case 0x6: /* VL6 */
1593 case 0x7: /* VL7 */
1594 case 0x8: /* VL8 */
1595 bound = pattern;
1596 break;
1597 case 0x9: /* VL16 */
1598 case 0xa: /* VL32 */
1599 case 0xb: /* VL64 */
1600 case 0xc: /* VL128 */
1601 case 0xd: /* VL256 */
1602 bound = 16 << (pattern - 9);
1603 break;
1604 case 0x1d: /* MUL4 */
1605 return elements - elements % 4;
1606 case 0x1e: /* MUL3 */
1607 return elements - elements % 3;
1608 case 0x1f: /* ALL */
1609 return elements;
1610 default: /* #uimm5 */
1611 return 0;
1612 }
1613 return elements >= bound ? bound : 0;
1614}
1615
1616/* This handles all of the predicate initialization instructions,
1617 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1618 * so that decode_pred_count returns 0. For SETFFR, we will have
1619 * set RD == 16 == FFR.
1620 */
1621static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1622{
1623 if (!sve_access_check(s)) {
1624 return true;
1625 }
1626
1627 unsigned fullsz = vec_full_reg_size(s);
1628 unsigned ofs = pred_full_reg_offset(s, rd);
1629 unsigned numelem, setsz, i;
1630 uint64_t word, lastword;
1631 TCGv_i64 t;
1632
1633 numelem = decode_pred_count(fullsz, pat, esz);
1634
1635 /* Determine what we must store into each bit, and how many. */
1636 if (numelem == 0) {
1637 lastword = word = 0;
1638 setsz = fullsz;
1639 } else {
1640 setsz = numelem << esz;
1641 lastword = word = pred_esz_masks[esz];
1642 if (setsz % 64) {
973558a3 1643 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1644 }
1645 }
1646
1647 t = tcg_temp_new_i64();
1648 if (fullsz <= 64) {
1649 tcg_gen_movi_i64(t, lastword);
1650 tcg_gen_st_i64(t, cpu_env, ofs);
1651 goto done;
1652 }
1653
1654 if (word == lastword) {
1655 unsigned maxsz = size_for_gvec(fullsz / 8);
1656 unsigned oprsz = size_for_gvec(setsz / 8);
1657
1658 if (oprsz * 8 == setsz) {
8711e71f 1659 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1660 goto done;
1661 }
028e2a7b
RH
1662 }
1663
1664 setsz /= 8;
1665 fullsz /= 8;
1666
1667 tcg_gen_movi_i64(t, word);
973558a3 1668 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1669 tcg_gen_st_i64(t, cpu_env, ofs + i);
1670 }
1671 if (lastword != word) {
1672 tcg_gen_movi_i64(t, lastword);
1673 tcg_gen_st_i64(t, cpu_env, ofs + i);
1674 i += 8;
1675 }
1676 if (i < fullsz) {
1677 tcg_gen_movi_i64(t, 0);
1678 for (; i < fullsz; i += 8) {
1679 tcg_gen_st_i64(t, cpu_env, ofs + i);
1680 }
1681 }
1682
1683 done:
1684 tcg_temp_free_i64(t);
1685
1686 /* PTRUES */
1687 if (setflag) {
1688 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1689 tcg_gen_movi_i32(cpu_CF, word == 0);
1690 tcg_gen_movi_i32(cpu_VF, 0);
1691 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1692 }
1693 return true;
1694}
1695
3a7be554 1696static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1697{
1698 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1699}
1700
3a7be554 1701static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1702{
1703 /* Note pat == 31 is #all, to set all elements. */
1704 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1705}
1706
3a7be554 1707static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1708{
1709 /* Note pat == 32 is #unimp, to set no elements. */
1710 return do_predset(s, 0, a->rd, 32, false);
1711}
1712
3a7be554 1713static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1714{
1715 /* The path through do_pppp_flags is complicated enough to want to avoid
1716 * duplication. Frob the arguments into the form of a predicated AND.
1717 */
1718 arg_rprr_s alt_a = {
1719 .rd = a->rd, .pg = a->pg, .s = a->s,
1720 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1721 };
3a7be554 1722 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1723}
1724
3a7be554 1725static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1726{
1727 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1728}
1729
3a7be554 1730static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1731{
1732 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1733}
1734
1735static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1736 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1737 TCGv_ptr, TCGv_i32))
1738{
1739 if (!sve_access_check(s)) {
1740 return true;
1741 }
1742
1743 TCGv_ptr t_pd = tcg_temp_new_ptr();
1744 TCGv_ptr t_pg = tcg_temp_new_ptr();
1745 TCGv_i32 t;
86300b5d 1746 unsigned desc = 0;
028e2a7b 1747
86300b5d
RH
1748 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1749 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1750
1751 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1752 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1753 t = tcg_temp_new_i32();
028e2a7b 1754
392acacc 1755 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1756 tcg_temp_free_ptr(t_pd);
1757 tcg_temp_free_ptr(t_pg);
1758
1759 do_pred_flags(t);
1760 tcg_temp_free_i32(t);
1761 return true;
1762}
1763
3a7be554 1764static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1765{
1766 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1767}
1768
3a7be554 1769static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1770{
1771 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1772}
1773
24e82e68
RH
1774/*
1775 *** SVE Element Count Group
1776 */
1777
1778/* Perform an inline saturating addition of a 32-bit value within
1779 * a 64-bit register. The second operand is known to be positive,
1780 * which halves the comparisions we must perform to bound the result.
1781 */
1782static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1783{
1784 int64_t ibound;
24e82e68
RH
1785
1786 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1787 if (u) {
1788 tcg_gen_ext32u_i64(reg, reg);
1789 } else {
1790 tcg_gen_ext32s_i64(reg, reg);
1791 }
1792 if (d) {
1793 tcg_gen_sub_i64(reg, reg, val);
1794 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1795 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1796 } else {
1797 tcg_gen_add_i64(reg, reg, val);
1798 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1799 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1800 }
24e82e68
RH
1801}
1802
1803/* Similarly with 64-bit values. */
1804static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1805{
1806 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1807 TCGv_i64 t2;
1808
1809 if (u) {
1810 if (d) {
1811 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1812 t2 = tcg_constant_i64(0);
1813 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1814 } else {
1815 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1816 t2 = tcg_constant_i64(-1);
1817 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1818 }
1819 } else {
35a1ec8e 1820 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1821 if (d) {
1822 /* Detect signed overflow for subtraction. */
1823 tcg_gen_xor_i64(t0, reg, val);
1824 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1825 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1826 tcg_gen_and_i64(t0, t0, reg);
1827
1828 /* Bound the result. */
1829 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1830 t2 = tcg_constant_i64(0);
24e82e68
RH
1831 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1832 } else {
1833 /* Detect signed overflow for addition. */
1834 tcg_gen_xor_i64(t0, reg, val);
1835 tcg_gen_add_i64(reg, reg, val);
1836 tcg_gen_xor_i64(t1, reg, val);
1837 tcg_gen_andc_i64(t0, t1, t0);
1838
1839 /* Bound the result. */
1840 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1841 t2 = tcg_constant_i64(0);
24e82e68
RH
1842 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1843 }
35a1ec8e 1844 tcg_temp_free_i64(t1);
24e82e68
RH
1845 }
1846 tcg_temp_free_i64(t0);
24e82e68
RH
1847}
1848
1849/* Similarly with a vector and a scalar operand. */
1850static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1851 TCGv_i64 val, bool u, bool d)
1852{
1853 unsigned vsz = vec_full_reg_size(s);
1854 TCGv_ptr dptr, nptr;
1855 TCGv_i32 t32, desc;
1856 TCGv_i64 t64;
1857
1858 dptr = tcg_temp_new_ptr();
1859 nptr = tcg_temp_new_ptr();
1860 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1861 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1862 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1863
1864 switch (esz) {
1865 case MO_8:
1866 t32 = tcg_temp_new_i32();
1867 tcg_gen_extrl_i64_i32(t32, val);
1868 if (d) {
1869 tcg_gen_neg_i32(t32, t32);
1870 }
1871 if (u) {
1872 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1873 } else {
1874 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1875 }
1876 tcg_temp_free_i32(t32);
1877 break;
1878
1879 case MO_16:
1880 t32 = tcg_temp_new_i32();
1881 tcg_gen_extrl_i64_i32(t32, val);
1882 if (d) {
1883 tcg_gen_neg_i32(t32, t32);
1884 }
1885 if (u) {
1886 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1887 } else {
1888 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1889 }
1890 tcg_temp_free_i32(t32);
1891 break;
1892
1893 case MO_32:
1894 t64 = tcg_temp_new_i64();
1895 if (d) {
1896 tcg_gen_neg_i64(t64, val);
1897 } else {
1898 tcg_gen_mov_i64(t64, val);
1899 }
1900 if (u) {
1901 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1902 } else {
1903 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1904 }
1905 tcg_temp_free_i64(t64);
1906 break;
1907
1908 case MO_64:
1909 if (u) {
1910 if (d) {
1911 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1912 } else {
1913 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1914 }
1915 } else if (d) {
1916 t64 = tcg_temp_new_i64();
1917 tcg_gen_neg_i64(t64, val);
1918 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1919 tcg_temp_free_i64(t64);
1920 } else {
1921 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1922 }
1923 break;
1924
1925 default:
1926 g_assert_not_reached();
1927 }
1928
1929 tcg_temp_free_ptr(dptr);
1930 tcg_temp_free_ptr(nptr);
24e82e68
RH
1931}
1932
3a7be554 1933static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1934{
1935 if (sve_access_check(s)) {
1936 unsigned fullsz = vec_full_reg_size(s);
1937 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1938 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1939 }
1940 return true;
1941}
1942
3a7be554 1943static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1944{
1945 if (sve_access_check(s)) {
1946 unsigned fullsz = vec_full_reg_size(s);
1947 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1948 int inc = numelem * a->imm * (a->d ? -1 : 1);
1949 TCGv_i64 reg = cpu_reg(s, a->rd);
1950
1951 tcg_gen_addi_i64(reg, reg, inc);
1952 }
1953 return true;
1954}
1955
3a7be554 1956static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1957{
1958 if (!sve_access_check(s)) {
1959 return true;
1960 }
1961
1962 unsigned fullsz = vec_full_reg_size(s);
1963 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1964 int inc = numelem * a->imm;
1965 TCGv_i64 reg = cpu_reg(s, a->rd);
1966
1967 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1968 if (inc == 0) {
1969 if (a->u) {
1970 tcg_gen_ext32u_i64(reg, reg);
1971 } else {
1972 tcg_gen_ext32s_i64(reg, reg);
1973 }
1974 } else {
d681f125 1975 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1976 }
1977 return true;
1978}
1979
3a7be554 1980static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1981{
1982 if (!sve_access_check(s)) {
1983 return true;
1984 }
1985
1986 unsigned fullsz = vec_full_reg_size(s);
1987 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1988 int inc = numelem * a->imm;
1989 TCGv_i64 reg = cpu_reg(s, a->rd);
1990
1991 if (inc != 0) {
d681f125 1992 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1993 }
1994 return true;
1995}
1996
3a7be554 1997static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1998{
1999 if (a->esz == 0) {
2000 return false;
2001 }
2002
2003 unsigned fullsz = vec_full_reg_size(s);
2004 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2005 int inc = numelem * a->imm;
2006
2007 if (inc != 0) {
2008 if (sve_access_check(s)) {
24e82e68
RH
2009 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2010 vec_full_reg_offset(s, a->rn),
d681f125
RH
2011 tcg_constant_i64(a->d ? -inc : inc),
2012 fullsz, fullsz);
24e82e68
RH
2013 }
2014 } else {
2015 do_mov_z(s, a->rd, a->rn);
2016 }
2017 return true;
2018}
2019
3a7be554 2020static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2021{
2022 if (a->esz == 0) {
2023 return false;
2024 }
2025
2026 unsigned fullsz = vec_full_reg_size(s);
2027 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2028 int inc = numelem * a->imm;
2029
2030 if (inc != 0) {
2031 if (sve_access_check(s)) {
d681f125
RH
2032 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2033 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2034 }
2035 } else {
2036 do_mov_z(s, a->rd, a->rn);
2037 }
2038 return true;
2039}
2040
e1fa1164
RH
2041/*
2042 *** SVE Bitwise Immediate Group
2043 */
2044
2045static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2046{
2047 uint64_t imm;
2048 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2049 extract32(a->dbm, 0, 6),
2050 extract32(a->dbm, 6, 6))) {
2051 return false;
2052 }
faf915e2 2053 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2054}
2055
15a314da
RH
2056TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2057TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2058TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2059
3a7be554 2060static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2061{
2062 uint64_t imm;
2063 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2064 extract32(a->dbm, 0, 6),
2065 extract32(a->dbm, 6, 6))) {
2066 return false;
2067 }
2068 if (sve_access_check(s)) {
2069 do_dupi_z(s, a->rd, imm);
2070 }
2071 return true;
2072}
2073
f25a2361
RH
2074/*
2075 *** SVE Integer Wide Immediate - Predicated Group
2076 */
2077
2078/* Implement all merging copies. This is used for CPY (immediate),
2079 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2080 */
2081static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2082 TCGv_i64 val)
2083{
2084 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2085 static gen_cpy * const fns[4] = {
2086 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2087 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2088 };
2089 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2090 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2091 TCGv_ptr t_zd = tcg_temp_new_ptr();
2092 TCGv_ptr t_zn = tcg_temp_new_ptr();
2093 TCGv_ptr t_pg = tcg_temp_new_ptr();
2094
2095 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2096 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2097 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2098
2099 fns[esz](t_zd, t_zn, t_pg, val, desc);
2100
2101 tcg_temp_free_ptr(t_zd);
2102 tcg_temp_free_ptr(t_zn);
2103 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2104}
2105
3a7be554 2106static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2107{
2108 if (a->esz == 0) {
2109 return false;
2110 }
2111 if (sve_access_check(s)) {
2112 /* Decode the VFP immediate. */
2113 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2114 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2115 }
2116 return true;
2117}
2118
3a7be554 2119static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2120{
3a7be554 2121 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2122 return false;
2123 }
2124 if (sve_access_check(s)) {
e152b48b 2125 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2126 }
2127 return true;
2128}
2129
3a7be554 2130static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2131{
2132 static gen_helper_gvec_2i * const fns[4] = {
2133 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2134 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2135 };
2136
3a7be554 2137 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2138 return false;
2139 }
2140 if (sve_access_check(s)) {
2141 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2142 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2143 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2144 tcg_constant_i64(a->imm),
2145 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2146 }
2147 return true;
2148}
2149
b94f8f60
RH
2150/*
2151 *** SVE Permute Extract Group
2152 */
2153
75114792 2154static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2155{
2156 if (!sve_access_check(s)) {
2157 return true;
2158 }
2159
2160 unsigned vsz = vec_full_reg_size(s);
75114792 2161 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2162 unsigned n_siz = vsz - n_ofs;
75114792
SL
2163 unsigned d = vec_full_reg_offset(s, rd);
2164 unsigned n = vec_full_reg_offset(s, rn);
2165 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2166
2167 /* Use host vector move insns if we have appropriate sizes
2168 * and no unfortunate overlap.
2169 */
2170 if (m != d
2171 && n_ofs == size_for_gvec(n_ofs)
2172 && n_siz == size_for_gvec(n_siz)
2173 && (d != n || n_siz <= n_ofs)) {
2174 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2175 if (n_ofs != 0) {
2176 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2177 }
2178 } else {
2179 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2180 }
2181 return true;
2182}
2183
75114792
SL
2184static bool trans_EXT(DisasContext *s, arg_EXT *a)
2185{
2186 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2187}
2188
2189static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2190{
2191 if (!dc_isar_feature(aa64_sve2, s)) {
2192 return false;
2193 }
2194 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2195}
2196
30562ab7
RH
2197/*
2198 *** SVE Permute - Unpredicated Group
2199 */
2200
3a7be554 2201static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2202{
2203 if (sve_access_check(s)) {
2204 unsigned vsz = vec_full_reg_size(s);
2205 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2206 vsz, vsz, cpu_reg_sp(s, a->rn));
2207 }
2208 return true;
2209}
2210
3a7be554 2211static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2212{
2213 if ((a->imm & 0x1f) == 0) {
2214 return false;
2215 }
2216 if (sve_access_check(s)) {
2217 unsigned vsz = vec_full_reg_size(s);
2218 unsigned dofs = vec_full_reg_offset(s, a->rd);
2219 unsigned esz, index;
2220
2221 esz = ctz32(a->imm);
2222 index = a->imm >> (esz + 1);
2223
2224 if ((index << esz) < vsz) {
2225 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2226 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2227 } else {
7e17d50e
RH
2228 /*
2229 * While dup_mem handles 128-bit elements, dup_imm does not.
2230 * Thankfully element size doesn't matter for splatting zero.
2231 */
2232 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2233 }
2234 }
2235 return true;
2236}
2237
2238static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2239{
2240 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2241 static gen_insr * const fns[4] = {
2242 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2243 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2244 };
2245 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2246 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2247 TCGv_ptr t_zd = tcg_temp_new_ptr();
2248 TCGv_ptr t_zn = tcg_temp_new_ptr();
2249
2250 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2251 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2252
2253 fns[a->esz](t_zd, t_zn, val, desc);
2254
2255 tcg_temp_free_ptr(t_zd);
2256 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2257}
2258
3a7be554 2259static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2260{
2261 if (sve_access_check(s)) {
2262 TCGv_i64 t = tcg_temp_new_i64();
2263 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2264 do_insr_i64(s, a, t);
2265 tcg_temp_free_i64(t);
2266 }
2267 return true;
2268}
2269
3a7be554 2270static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2271{
2272 if (sve_access_check(s)) {
2273 do_insr_i64(s, a, cpu_reg(s, a->rm));
2274 }
2275 return true;
2276}
2277
0ea3cdbf
RH
2278static gen_helper_gvec_2 * const rev_fns[4] = {
2279 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2280 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2281};
2282TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2283
32e2ad65
RH
2284static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2285 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2286 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2287};
2288TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2289
5f425b92
RH
2290static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2291 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2292 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2293};
2294TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2295 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2296
32e2ad65
RH
2297static gen_helper_gvec_3 * const tbx_fns[4] = {
2298 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2299 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2300};
2301TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2302
3a7be554 2303static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2304{
2305 static gen_helper_gvec_2 * const fns[4][2] = {
2306 { NULL, NULL },
2307 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2308 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2309 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2310 };
2311
2312 if (a->esz == 0) {
2313 return false;
2314 }
2315 if (sve_access_check(s)) {
2316 unsigned vsz = vec_full_reg_size(s);
2317 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2318 vec_full_reg_offset(s, a->rn)
2319 + (a->h ? vsz / 2 : 0),
2320 vsz, vsz, 0, fns[a->esz][a->u]);
2321 }
2322 return true;
2323}
2324
d731d8cb
RH
2325/*
2326 *** SVE Permute - Predicates Group
2327 */
2328
2329static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2330 gen_helper_gvec_3 *fn)
2331{
2332 if (!sve_access_check(s)) {
2333 return true;
2334 }
2335
2336 unsigned vsz = pred_full_reg_size(s);
2337
d731d8cb
RH
2338 TCGv_ptr t_d = tcg_temp_new_ptr();
2339 TCGv_ptr t_n = tcg_temp_new_ptr();
2340 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2341 uint32_t desc = 0;
d731d8cb 2342
f9b0fcce
RH
2343 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2344 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2345 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2346
2347 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2348 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2349 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2350
c6a59b55 2351 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2352
2353 tcg_temp_free_ptr(t_d);
2354 tcg_temp_free_ptr(t_n);
2355 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2356 return true;
2357}
2358
2359static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2360 gen_helper_gvec_2 *fn)
2361{
2362 if (!sve_access_check(s)) {
2363 return true;
2364 }
2365
2366 unsigned vsz = pred_full_reg_size(s);
2367 TCGv_ptr t_d = tcg_temp_new_ptr();
2368 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2369 uint32_t desc = 0;
d731d8cb
RH
2370
2371 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2372 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2373
70acaafe
RH
2374 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2375 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2376 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2377
c6a59b55 2378 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2379
d731d8cb
RH
2380 tcg_temp_free_ptr(t_d);
2381 tcg_temp_free_ptr(t_n);
2382 return true;
2383}
2384
3a7be554 2385static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2386{
2387 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2388}
2389
3a7be554 2390static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2391{
2392 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2393}
2394
3a7be554 2395static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2396{
2397 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2398}
2399
3a7be554 2400static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2401{
2402 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2403}
2404
3a7be554 2405static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2406{
2407 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2408}
2409
3a7be554 2410static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2411{
2412 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2413}
2414
3a7be554 2415static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2416{
2417 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2418}
2419
3a7be554 2420static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2421{
2422 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2423}
2424
3a7be554 2425static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2426{
2427 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2428}
2429
234b48e9
RH
2430/*
2431 *** SVE Permute - Interleaving Group
2432 */
2433
2434static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2435{
2436 static gen_helper_gvec_3 * const fns[4] = {
2437 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2438 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2439 };
2440
2441 if (sve_access_check(s)) {
2442 unsigned vsz = vec_full_reg_size(s);
2443 unsigned high_ofs = high ? vsz / 2 : 0;
2444 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2445 vec_full_reg_offset(s, a->rn) + high_ofs,
2446 vec_full_reg_offset(s, a->rm) + high_ofs,
2447 vsz, vsz, 0, fns[a->esz]);
2448 }
2449 return true;
2450}
2451
3a7be554 2452static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2453{
2454 return do_zip(s, a, false);
2455}
2456
3a7be554 2457static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2458{
2459 return do_zip(s, a, true);
2460}
2461
74b64b25
RH
2462static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2463{
2464 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2465 return false;
2466 }
2467 if (sve_access_check(s)) {
2468 unsigned vsz = vec_full_reg_size(s);
2469 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2470 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2471 vec_full_reg_offset(s, a->rn) + high_ofs,
2472 vec_full_reg_offset(s, a->rm) + high_ofs,
2473 vsz, vsz, 0, gen_helper_sve2_zip_q);
2474 }
2475 return true;
2476}
2477
2478static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2479{
2480 return do_zip_q(s, a, false);
2481}
2482
2483static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2484{
2485 return do_zip_q(s, a, true);
2486}
2487
234b48e9
RH
2488static gen_helper_gvec_3 * const uzp_fns[4] = {
2489 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2490 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2491};
2492
32e2ad65
RH
2493TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2494 uzp_fns[a->esz], a, 0)
2495TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2496 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2497
32e2ad65
RH
2498TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2499 gen_helper_sve2_uzp_q, a, 0)
2500TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2501 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2502
234b48e9
RH
2503static gen_helper_gvec_3 * const trn_fns[4] = {
2504 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2505 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2506};
2507
32e2ad65
RH
2508TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2509 trn_fns[a->esz], a, 0)
2510TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2511 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2512
32e2ad65
RH
2513TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2514 gen_helper_sve2_trn_q, a, 0)
2515TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2516 gen_helper_sve2_trn_q, a, 16)
74b64b25 2517
3ca879ae
RH
2518/*
2519 *** SVE Permute Vector - Predicated Group
2520 */
2521
817bd5c9
RH
2522static gen_helper_gvec_3 * const compact_fns[4] = {
2523 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2524};
2525TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2526
ef23cb72
RH
2527/* Call the helper that computes the ARM LastActiveElement pseudocode
2528 * function, scaled by the element size. This includes the not found
2529 * indication; e.g. not found for esz=3 is -8.
2530 */
2531static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2532{
2533 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2534 * round up, as we do elsewhere, because we need the exact size.
2535 */
2536 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2537 unsigned desc = 0;
ef23cb72 2538
2acbfbe4
RH
2539 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2540 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2541
2542 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2543
c6a59b55 2544 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2545
ef23cb72
RH
2546 tcg_temp_free_ptr(t_p);
2547}
2548
2549/* Increment LAST to the offset of the next element in the vector,
2550 * wrapping around to 0.
2551 */
2552static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2553{
2554 unsigned vsz = vec_full_reg_size(s);
2555
2556 tcg_gen_addi_i32(last, last, 1 << esz);
2557 if (is_power_of_2(vsz)) {
2558 tcg_gen_andi_i32(last, last, vsz - 1);
2559 } else {
4b308bd5
RH
2560 TCGv_i32 max = tcg_constant_i32(vsz);
2561 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2562 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2563 }
2564}
2565
2566/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2567static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2568{
2569 unsigned vsz = vec_full_reg_size(s);
2570
2571 if (is_power_of_2(vsz)) {
2572 tcg_gen_andi_i32(last, last, vsz - 1);
2573 } else {
4b308bd5
RH
2574 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2575 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2576 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2577 }
2578}
2579
2580/* Load an unsigned element of ESZ from BASE+OFS. */
2581static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2582{
2583 TCGv_i64 r = tcg_temp_new_i64();
2584
2585 switch (esz) {
2586 case 0:
2587 tcg_gen_ld8u_i64(r, base, ofs);
2588 break;
2589 case 1:
2590 tcg_gen_ld16u_i64(r, base, ofs);
2591 break;
2592 case 2:
2593 tcg_gen_ld32u_i64(r, base, ofs);
2594 break;
2595 case 3:
2596 tcg_gen_ld_i64(r, base, ofs);
2597 break;
2598 default:
2599 g_assert_not_reached();
2600 }
2601 return r;
2602}
2603
2604/* Load an unsigned element of ESZ from RM[LAST]. */
2605static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2606 int rm, int esz)
2607{
2608 TCGv_ptr p = tcg_temp_new_ptr();
2609 TCGv_i64 r;
2610
2611 /* Convert offset into vector into offset into ENV.
2612 * The final adjustment for the vector register base
2613 * is added via constant offset to the load.
2614 */
e03b5686 2615#if HOST_BIG_ENDIAN
ef23cb72
RH
2616 /* Adjust for element ordering. See vec_reg_offset. */
2617 if (esz < 3) {
2618 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2619 }
2620#endif
2621 tcg_gen_ext_i32_ptr(p, last);
2622 tcg_gen_add_ptr(p, p, cpu_env);
2623
2624 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2625 tcg_temp_free_ptr(p);
2626
2627 return r;
2628}
2629
2630/* Compute CLAST for a Zreg. */
2631static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2632{
2633 TCGv_i32 last;
2634 TCGLabel *over;
2635 TCGv_i64 ele;
2636 unsigned vsz, esz = a->esz;
2637
2638 if (!sve_access_check(s)) {
2639 return true;
2640 }
2641
2642 last = tcg_temp_local_new_i32();
2643 over = gen_new_label();
2644
2645 find_last_active(s, last, esz, a->pg);
2646
2647 /* There is of course no movcond for a 2048-bit vector,
2648 * so we must branch over the actual store.
2649 */
2650 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2651
2652 if (!before) {
2653 incr_last_active(s, last, esz);
2654 }
2655
2656 ele = load_last_active(s, last, a->rm, esz);
2657 tcg_temp_free_i32(last);
2658
2659 vsz = vec_full_reg_size(s);
2660 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2661 tcg_temp_free_i64(ele);
2662
2663 /* If this insn used MOVPRFX, we may need a second move. */
2664 if (a->rd != a->rn) {
2665 TCGLabel *done = gen_new_label();
2666 tcg_gen_br(done);
2667
2668 gen_set_label(over);
2669 do_mov_z(s, a->rd, a->rn);
2670
2671 gen_set_label(done);
2672 } else {
2673 gen_set_label(over);
2674 }
2675 return true;
2676}
2677
3a7be554 2678static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2679{
2680 return do_clast_vector(s, a, false);
2681}
2682
3a7be554 2683static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2684{
2685 return do_clast_vector(s, a, true);
2686}
2687
2688/* Compute CLAST for a scalar. */
2689static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2690 bool before, TCGv_i64 reg_val)
2691{
2692 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2693 TCGv_i64 ele, cmp;
ef23cb72
RH
2694
2695 find_last_active(s, last, esz, pg);
2696
2697 /* Extend the original value of last prior to incrementing. */
2698 cmp = tcg_temp_new_i64();
2699 tcg_gen_ext_i32_i64(cmp, last);
2700
2701 if (!before) {
2702 incr_last_active(s, last, esz);
2703 }
2704
2705 /* The conceit here is that while last < 0 indicates not found, after
2706 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2707 * from which we can load garbage. We then discard the garbage with
2708 * a conditional move.
2709 */
2710 ele = load_last_active(s, last, rm, esz);
2711 tcg_temp_free_i32(last);
2712
053552d3
RH
2713 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2714 ele, reg_val);
ef23cb72 2715
ef23cb72
RH
2716 tcg_temp_free_i64(cmp);
2717 tcg_temp_free_i64(ele);
2718}
2719
2720/* Compute CLAST for a Vreg. */
2721static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2722{
2723 if (sve_access_check(s)) {
2724 int esz = a->esz;
2725 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2726 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2727
2728 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2729 write_fp_dreg(s, a->rd, reg);
2730 tcg_temp_free_i64(reg);
2731 }
2732 return true;
2733}
2734
3a7be554 2735static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2736{
2737 return do_clast_fp(s, a, false);
2738}
2739
3a7be554 2740static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2741{
2742 return do_clast_fp(s, a, true);
2743}
2744
2745/* Compute CLAST for a Xreg. */
2746static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2747{
2748 TCGv_i64 reg;
2749
2750 if (!sve_access_check(s)) {
2751 return true;
2752 }
2753
2754 reg = cpu_reg(s, a->rd);
2755 switch (a->esz) {
2756 case 0:
2757 tcg_gen_ext8u_i64(reg, reg);
2758 break;
2759 case 1:
2760 tcg_gen_ext16u_i64(reg, reg);
2761 break;
2762 case 2:
2763 tcg_gen_ext32u_i64(reg, reg);
2764 break;
2765 case 3:
2766 break;
2767 default:
2768 g_assert_not_reached();
2769 }
2770
2771 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2772 return true;
2773}
2774
3a7be554 2775static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2776{
2777 return do_clast_general(s, a, false);
2778}
2779
3a7be554 2780static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2781{
2782 return do_clast_general(s, a, true);
2783}
2784
2785/* Compute LAST for a scalar. */
2786static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2787 int pg, int rm, bool before)
2788{
2789 TCGv_i32 last = tcg_temp_new_i32();
2790 TCGv_i64 ret;
2791
2792 find_last_active(s, last, esz, pg);
2793 if (before) {
2794 wrap_last_active(s, last, esz);
2795 } else {
2796 incr_last_active(s, last, esz);
2797 }
2798
2799 ret = load_last_active(s, last, rm, esz);
2800 tcg_temp_free_i32(last);
2801 return ret;
2802}
2803
2804/* Compute LAST for a Vreg. */
2805static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2806{
2807 if (sve_access_check(s)) {
2808 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2809 write_fp_dreg(s, a->rd, val);
2810 tcg_temp_free_i64(val);
2811 }
2812 return true;
2813}
2814
3a7be554 2815static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2816{
2817 return do_last_fp(s, a, false);
2818}
2819
3a7be554 2820static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2821{
2822 return do_last_fp(s, a, true);
2823}
2824
2825/* Compute LAST for a Xreg. */
2826static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2827{
2828 if (sve_access_check(s)) {
2829 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2830 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2831 tcg_temp_free_i64(val);
2832 }
2833 return true;
2834}
2835
3a7be554 2836static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2837{
2838 return do_last_general(s, a, false);
2839}
2840
3a7be554 2841static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2842{
2843 return do_last_general(s, a, true);
2844}
2845
3a7be554 2846static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2847{
2848 if (sve_access_check(s)) {
2849 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2850 }
2851 return true;
2852}
2853
3a7be554 2854static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2855{
2856 if (sve_access_check(s)) {
2857 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2858 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2859 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2860 tcg_temp_free_i64(t);
2861 }
2862 return true;
2863}
2864
817bd5c9
RH
2865static gen_helper_gvec_3 * const revb_fns[4] = {
2866 NULL, gen_helper_sve_revb_h,
2867 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2868};
2869TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2870
817bd5c9
RH
2871static gen_helper_gvec_3 * const revh_fns[4] = {
2872 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2873};
2874TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2875
817bd5c9
RH
2876TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2877 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2878
3a7be554 2879static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240 2880{
2a753d1e
RH
2881 return gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2882 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2883}
2884
75114792
SL
2885static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
2886{
2887 if (!dc_isar_feature(aa64_sve2, s)) {
2888 return false;
2889 }
2a753d1e
RH
2890 return gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2891 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
75114792
SL
2892}
2893
757f9cff
RH
2894/*
2895 *** SVE Integer Compare - Vectors Group
2896 */
2897
2898static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2899 gen_helper_gvec_flags_4 *gen_fn)
2900{
2901 TCGv_ptr pd, zn, zm, pg;
2902 unsigned vsz;
2903 TCGv_i32 t;
2904
2905 if (gen_fn == NULL) {
2906 return false;
2907 }
2908 if (!sve_access_check(s)) {
2909 return true;
2910 }
2911
2912 vsz = vec_full_reg_size(s);
392acacc 2913 t = tcg_temp_new_i32();
757f9cff
RH
2914 pd = tcg_temp_new_ptr();
2915 zn = tcg_temp_new_ptr();
2916 zm = tcg_temp_new_ptr();
2917 pg = tcg_temp_new_ptr();
2918
2919 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2920 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2921 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2922 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2923
392acacc 2924 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2925
2926 tcg_temp_free_ptr(pd);
2927 tcg_temp_free_ptr(zn);
2928 tcg_temp_free_ptr(zm);
2929 tcg_temp_free_ptr(pg);
2930
2931 do_pred_flags(t);
2932
2933 tcg_temp_free_i32(t);
2934 return true;
2935}
2936
2937#define DO_PPZZ(NAME, name) \
3a7be554 2938static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2939{ \
2940 static gen_helper_gvec_flags_4 * const fns[4] = { \
2941 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2942 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2943 }; \
2944 return do_ppzz_flags(s, a, fns[a->esz]); \
2945}
2946
2947DO_PPZZ(CMPEQ, cmpeq)
2948DO_PPZZ(CMPNE, cmpne)
2949DO_PPZZ(CMPGT, cmpgt)
2950DO_PPZZ(CMPGE, cmpge)
2951DO_PPZZ(CMPHI, cmphi)
2952DO_PPZZ(CMPHS, cmphs)
2953
2954#undef DO_PPZZ
2955
2956#define DO_PPZW(NAME, name) \
3a7be554 2957static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2958{ \
2959 static gen_helper_gvec_flags_4 * const fns[4] = { \
2960 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2961 gen_helper_sve_##name##_ppzw_s, NULL \
2962 }; \
2963 return do_ppzz_flags(s, a, fns[a->esz]); \
2964}
2965
2966DO_PPZW(CMPEQ, cmpeq)
2967DO_PPZW(CMPNE, cmpne)
2968DO_PPZW(CMPGT, cmpgt)
2969DO_PPZW(CMPGE, cmpge)
2970DO_PPZW(CMPHI, cmphi)
2971DO_PPZW(CMPHS, cmphs)
2972DO_PPZW(CMPLT, cmplt)
2973DO_PPZW(CMPLE, cmple)
2974DO_PPZW(CMPLO, cmplo)
2975DO_PPZW(CMPLS, cmpls)
2976
2977#undef DO_PPZW
2978
38cadeba
RH
2979/*
2980 *** SVE Integer Compare - Immediate Groups
2981 */
2982
2983static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2984 gen_helper_gvec_flags_3 *gen_fn)
2985{
2986 TCGv_ptr pd, zn, pg;
2987 unsigned vsz;
2988 TCGv_i32 t;
2989
2990 if (gen_fn == NULL) {
2991 return false;
2992 }
2993 if (!sve_access_check(s)) {
2994 return true;
2995 }
2996
2997 vsz = vec_full_reg_size(s);
392acacc 2998 t = tcg_temp_new_i32();
38cadeba
RH
2999 pd = tcg_temp_new_ptr();
3000 zn = tcg_temp_new_ptr();
3001 pg = tcg_temp_new_ptr();
3002
3003 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3004 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3005 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3006
392acacc 3007 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3008
3009 tcg_temp_free_ptr(pd);
3010 tcg_temp_free_ptr(zn);
3011 tcg_temp_free_ptr(pg);
3012
3013 do_pred_flags(t);
3014
3015 tcg_temp_free_i32(t);
3016 return true;
3017}
3018
3019#define DO_PPZI(NAME, name) \
3a7be554 3020static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3021{ \
3022 static gen_helper_gvec_flags_3 * const fns[4] = { \
3023 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3024 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3025 }; \
3026 return do_ppzi_flags(s, a, fns[a->esz]); \
3027}
3028
3029DO_PPZI(CMPEQ, cmpeq)
3030DO_PPZI(CMPNE, cmpne)
3031DO_PPZI(CMPGT, cmpgt)
3032DO_PPZI(CMPGE, cmpge)
3033DO_PPZI(CMPHI, cmphi)
3034DO_PPZI(CMPHS, cmphs)
3035DO_PPZI(CMPLT, cmplt)
3036DO_PPZI(CMPLE, cmple)
3037DO_PPZI(CMPLO, cmplo)
3038DO_PPZI(CMPLS, cmpls)
3039
3040#undef DO_PPZI
3041
35da316f
RH
3042/*
3043 *** SVE Partition Break Group
3044 */
3045
3046static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3047 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3048{
3049 if (!sve_access_check(s)) {
3050 return true;
3051 }
3052
3053 unsigned vsz = pred_full_reg_size(s);
3054
3055 /* Predicate sizes may be smaller and cannot use simd_desc. */
3056 TCGv_ptr d = tcg_temp_new_ptr();
3057 TCGv_ptr n = tcg_temp_new_ptr();
3058 TCGv_ptr m = tcg_temp_new_ptr();
3059 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3060 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3061
3062 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3063 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3064 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3065 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3066
3067 if (a->s) {
93418f1c
RH
3068 TCGv_i32 t = tcg_temp_new_i32();
3069 fn_s(t, d, n, m, g, desc);
35da316f 3070 do_pred_flags(t);
93418f1c 3071 tcg_temp_free_i32(t);
35da316f 3072 } else {
93418f1c 3073 fn(d, n, m, g, desc);
35da316f
RH
3074 }
3075 tcg_temp_free_ptr(d);
3076 tcg_temp_free_ptr(n);
3077 tcg_temp_free_ptr(m);
3078 tcg_temp_free_ptr(g);
35da316f
RH
3079 return true;
3080}
3081
3082static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3083 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3084{
3085 if (!sve_access_check(s)) {
3086 return true;
3087 }
3088
3089 unsigned vsz = pred_full_reg_size(s);
3090
3091 /* Predicate sizes may be smaller and cannot use simd_desc. */
3092 TCGv_ptr d = tcg_temp_new_ptr();
3093 TCGv_ptr n = tcg_temp_new_ptr();
3094 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3095 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3096
3097 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3098 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3099 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3100
3101 if (a->s) {
93418f1c
RH
3102 TCGv_i32 t = tcg_temp_new_i32();
3103 fn_s(t, d, n, g, desc);
35da316f 3104 do_pred_flags(t);
93418f1c 3105 tcg_temp_free_i32(t);
35da316f 3106 } else {
93418f1c 3107 fn(d, n, g, desc);
35da316f
RH
3108 }
3109 tcg_temp_free_ptr(d);
3110 tcg_temp_free_ptr(n);
3111 tcg_temp_free_ptr(g);
35da316f
RH
3112 return true;
3113}
3114
3a7be554 3115static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3116{
3117 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3118}
3119
3a7be554 3120static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3121{
3122 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3123}
3124
3a7be554 3125static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3126{
3127 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3128}
3129
3a7be554 3130static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3131{
3132 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3133}
3134
3a7be554 3135static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3136{
3137 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3138}
3139
3a7be554 3140static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3141{
3142 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3143}
3144
3a7be554 3145static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3146{
3147 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3148}
3149
9ee3a611
RH
3150/*
3151 *** SVE Predicate Count Group
3152 */
3153
3154static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3155{
3156 unsigned psz = pred_full_reg_size(s);
3157
3158 if (psz <= 8) {
3159 uint64_t psz_mask;
3160
3161 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3162 if (pn != pg) {
3163 TCGv_i64 g = tcg_temp_new_i64();
3164 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3165 tcg_gen_and_i64(val, val, g);
3166 tcg_temp_free_i64(g);
3167 }
3168
3169 /* Reduce the pred_esz_masks value simply to reduce the
3170 * size of the code generated here.
3171 */
3172 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3173 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3174
3175 tcg_gen_ctpop_i64(val, val);
3176 } else {
3177 TCGv_ptr t_pn = tcg_temp_new_ptr();
3178 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3179 unsigned desc = 0;
9ee3a611 3180
f556a201
RH
3181 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3182 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3183
3184 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3185 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3186
c6a59b55 3187 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3188 tcg_temp_free_ptr(t_pn);
3189 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3190 }
3191}
3192
3a7be554 3193static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3194{
3195 if (sve_access_check(s)) {
3196 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3197 }
3198 return true;
3199}
3200
3a7be554 3201static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3202{
3203 if (sve_access_check(s)) {
3204 TCGv_i64 reg = cpu_reg(s, a->rd);
3205 TCGv_i64 val = tcg_temp_new_i64();
3206
3207 do_cntp(s, val, a->esz, a->pg, a->pg);
3208 if (a->d) {
3209 tcg_gen_sub_i64(reg, reg, val);
3210 } else {
3211 tcg_gen_add_i64(reg, reg, val);
3212 }
3213 tcg_temp_free_i64(val);
3214 }
3215 return true;
3216}
3217
3a7be554 3218static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3219{
3220 if (a->esz == 0) {
3221 return false;
3222 }
3223 if (sve_access_check(s)) {
3224 unsigned vsz = vec_full_reg_size(s);
3225 TCGv_i64 val = tcg_temp_new_i64();
3226 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3227
3228 do_cntp(s, val, a->esz, a->pg, a->pg);
3229 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3230 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3231 }
3232 return true;
3233}
3234
3a7be554 3235static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3236{
3237 if (sve_access_check(s)) {
3238 TCGv_i64 reg = cpu_reg(s, a->rd);
3239 TCGv_i64 val = tcg_temp_new_i64();
3240
3241 do_cntp(s, val, a->esz, a->pg, a->pg);
3242 do_sat_addsub_32(reg, val, a->u, a->d);
3243 }
3244 return true;
3245}
3246
3a7be554 3247static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3248{
3249 if (sve_access_check(s)) {
3250 TCGv_i64 reg = cpu_reg(s, a->rd);
3251 TCGv_i64 val = tcg_temp_new_i64();
3252
3253 do_cntp(s, val, a->esz, a->pg, a->pg);
3254 do_sat_addsub_64(reg, val, a->u, a->d);
3255 }
3256 return true;
3257}
3258
3a7be554 3259static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3260{
3261 if (a->esz == 0) {
3262 return false;
3263 }
3264 if (sve_access_check(s)) {
3265 TCGv_i64 val = tcg_temp_new_i64();
3266 do_cntp(s, val, a->esz, a->pg, a->pg);
3267 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3268 }
3269 return true;
3270}
3271
caf1cefc
RH
3272/*
3273 *** SVE Integer Compare Scalars Group
3274 */
3275
3a7be554 3276static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3277{
3278 if (!sve_access_check(s)) {
3279 return true;
3280 }
3281
3282 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3283 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3284 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3285 TCGv_i64 cmp = tcg_temp_new_i64();
3286
3287 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3288 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3289 tcg_temp_free_i64(cmp);
3290
3291 /* VF = !NF & !CF. */
3292 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3293 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3294
3295 /* Both NF and VF actually look at bit 31. */
3296 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3297 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3298 return true;
3299}
3300
3a7be554 3301static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3302{
bbd0968c 3303 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3304 TCGv_i32 t2;
caf1cefc 3305 TCGv_ptr ptr;
e610906c
RH
3306 unsigned vsz = vec_full_reg_size(s);
3307 unsigned desc = 0;
caf1cefc 3308 TCGCond cond;
34688dbc
RH
3309 uint64_t maxval;
3310 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3311 bool eq = a->eq == a->lt;
caf1cefc 3312
34688dbc
RH
3313 /* The greater-than conditions are all SVE2. */
3314 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3315 return false;
3316 }
bbd0968c
RH
3317 if (!sve_access_check(s)) {
3318 return true;
3319 }
3320
3321 op0 = read_cpu_reg(s, a->rn, 1);
3322 op1 = read_cpu_reg(s, a->rm, 1);
3323
caf1cefc
RH
3324 if (!a->sf) {
3325 if (a->u) {
3326 tcg_gen_ext32u_i64(op0, op0);
3327 tcg_gen_ext32u_i64(op1, op1);
3328 } else {
3329 tcg_gen_ext32s_i64(op0, op0);
3330 tcg_gen_ext32s_i64(op1, op1);
3331 }
3332 }
3333
3334 /* For the helper, compress the different conditions into a computation
3335 * of how many iterations for which the condition is true.
caf1cefc 3336 */
bbd0968c
RH
3337 t0 = tcg_temp_new_i64();
3338 t1 = tcg_temp_new_i64();
34688dbc
RH
3339
3340 if (a->lt) {
3341 tcg_gen_sub_i64(t0, op1, op0);
3342 if (a->u) {
3343 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3344 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3345 } else {
3346 maxval = a->sf ? INT64_MAX : INT32_MAX;
3347 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3348 }
3349 } else {
3350 tcg_gen_sub_i64(t0, op0, op1);
3351 if (a->u) {
3352 maxval = 0;
3353 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3354 } else {
3355 maxval = a->sf ? INT64_MIN : INT32_MIN;
3356 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3357 }
3358 }
caf1cefc 3359
4481bbf2 3360 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3361 if (eq) {
caf1cefc
RH
3362 /* Equality means one more iteration. */
3363 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3364
34688dbc
RH
3365 /*
3366 * For the less-than while, if op1 is maxval (and the only time
3367 * the addition above could overflow), then we produce an all-true
3368 * predicate by setting the count to the vector length. This is
3369 * because the pseudocode is described as an increment + compare
3370 * loop, and the maximum integer would always compare true.
3371 * Similarly, the greater-than while has the same issue with the
3372 * minimum integer due to the decrement + compare loop.
bbd0968c 3373 */
34688dbc 3374 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3375 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3376 }
3377
bbd0968c
RH
3378 /* Bound to the maximum. */
3379 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3380
3381 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3382 tcg_gen_movi_i64(t1, 0);
3383 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3384 tcg_temp_free_i64(t1);
caf1cefc 3385
bbd0968c 3386 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3387 t2 = tcg_temp_new_i32();
3388 tcg_gen_extrl_i64_i32(t2, t0);
3389 tcg_temp_free_i64(t0);
bbd0968c
RH
3390
3391 /* Scale elements to bits. */
3392 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3393
e610906c
RH
3394 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3395 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3396
3397 ptr = tcg_temp_new_ptr();
3398 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3399
34688dbc 3400 if (a->lt) {
4481bbf2 3401 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3402 } else {
4481bbf2 3403 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3404 }
caf1cefc
RH
3405 do_pred_flags(t2);
3406
3407 tcg_temp_free_ptr(ptr);
3408 tcg_temp_free_i32(t2);
caf1cefc
RH
3409 return true;
3410}
3411
14f6dad1
RH
3412static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3413{
3414 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3415 TCGv_i32 t2;
14f6dad1
RH
3416 TCGv_ptr ptr;
3417 unsigned vsz = vec_full_reg_size(s);
3418 unsigned desc = 0;
3419
3420 if (!dc_isar_feature(aa64_sve2, s)) {
3421 return false;
3422 }
3423 if (!sve_access_check(s)) {
3424 return true;
3425 }
3426
3427 op0 = read_cpu_reg(s, a->rn, 1);
3428 op1 = read_cpu_reg(s, a->rm, 1);
3429
4481bbf2 3430 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3431 diff = tcg_temp_new_i64();
3432
3433 if (a->rw) {
3434 /* WHILERW */
3435 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3436 t1 = tcg_temp_new_i64();
3437 tcg_gen_sub_i64(diff, op0, op1);
3438 tcg_gen_sub_i64(t1, op1, op0);
3439 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3440 tcg_temp_free_i64(t1);
3441 /* Round down to a multiple of ESIZE. */
3442 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3443 /* If op1 == op0, diff == 0, and the condition is always true. */
3444 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3445 } else {
3446 /* WHILEWR */
3447 tcg_gen_sub_i64(diff, op1, op0);
3448 /* Round down to a multiple of ESIZE. */
3449 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3450 /* If op0 >= op1, diff <= 0, the condition is always true. */
3451 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3452 }
3453
3454 /* Bound to the maximum. */
3455 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3456
3457 /* Since we're bounded, pass as a 32-bit type. */
3458 t2 = tcg_temp_new_i32();
3459 tcg_gen_extrl_i64_i32(t2, diff);
3460 tcg_temp_free_i64(diff);
3461
3462 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3463 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3464
3465 ptr = tcg_temp_new_ptr();
3466 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3467
4481bbf2 3468 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3469 do_pred_flags(t2);
3470
3471 tcg_temp_free_ptr(ptr);
3472 tcg_temp_free_i32(t2);
14f6dad1
RH
3473 return true;
3474}
3475
ed491961
RH
3476/*
3477 *** SVE Integer Wide Immediate - Unpredicated Group
3478 */
3479
3a7be554 3480static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3481{
3482 if (a->esz == 0) {
3483 return false;
3484 }
3485 if (sve_access_check(s)) {
3486 unsigned vsz = vec_full_reg_size(s);
3487 int dofs = vec_full_reg_offset(s, a->rd);
3488 uint64_t imm;
3489
3490 /* Decode the VFP immediate. */
3491 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3492 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3493 }
3494 return true;
3495}
3496
3a7be554 3497static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3498{
3a7be554 3499 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3500 return false;
3501 }
3502 if (sve_access_check(s)) {
3503 unsigned vsz = vec_full_reg_size(s);
3504 int dofs = vec_full_reg_offset(s, a->rd);
3505
8711e71f 3506 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3507 }
3508 return true;
3509}
3510
3a7be554 3511static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3512{
3a7be554 3513 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3514 return false;
3515 }
ada378f0 3516 return gen_gvec_fn_arg_zzi(s, tcg_gen_gvec_addi, a);
6e6a157d
RH
3517}
3518
3a7be554 3519static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3520{
3521 a->imm = -a->imm;
3a7be554 3522 return trans_ADD_zzi(s, a);
6e6a157d
RH
3523}
3524
3a7be554 3525static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3526{
53229a77 3527 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3528 static const GVecGen2s op[4] = {
3529 { .fni8 = tcg_gen_vec_sub8_i64,
3530 .fniv = tcg_gen_sub_vec,
3531 .fno = gen_helper_sve_subri_b,
53229a77 3532 .opt_opc = vecop_list,
6e6a157d
RH
3533 .vece = MO_8,
3534 .scalar_first = true },
3535 { .fni8 = tcg_gen_vec_sub16_i64,
3536 .fniv = tcg_gen_sub_vec,
3537 .fno = gen_helper_sve_subri_h,
53229a77 3538 .opt_opc = vecop_list,
6e6a157d
RH
3539 .vece = MO_16,
3540 .scalar_first = true },
3541 { .fni4 = tcg_gen_sub_i32,
3542 .fniv = tcg_gen_sub_vec,
3543 .fno = gen_helper_sve_subri_s,
53229a77 3544 .opt_opc = vecop_list,
6e6a157d
RH
3545 .vece = MO_32,
3546 .scalar_first = true },
3547 { .fni8 = tcg_gen_sub_i64,
3548 .fniv = tcg_gen_sub_vec,
3549 .fno = gen_helper_sve_subri_d,
53229a77 3550 .opt_opc = vecop_list,
6e6a157d
RH
3551 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3552 .vece = MO_64,
3553 .scalar_first = true }
3554 };
3555
3a7be554 3556 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3557 return false;
3558 }
3559 if (sve_access_check(s)) {
3560 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3561 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3562 vec_full_reg_offset(s, a->rn),
9fff3fcc 3563 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3564 }
3565 return true;
3566}
3567
3a7be554 3568static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3569{
3570 if (sve_access_check(s)) {
3571 unsigned vsz = vec_full_reg_size(s);
3572 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3573 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3574 }
3575 return true;
3576}
3577
3a7be554 3578static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3579{
3a7be554 3580 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3581 return false;
3582 }
3583 if (sve_access_check(s)) {
138a1f7b
RH
3584 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3585 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3586 }
3587 return true;
3588}
3589
3a7be554 3590static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3591{
3a7be554 3592 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3593}
3594
3a7be554 3595static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3596{
3a7be554 3597 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3598}
3599
3a7be554 3600static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3601{
3a7be554 3602 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3603}
3604
3a7be554 3605static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3606{
3a7be554 3607 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3608}
3609
3610static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3611{
3612 if (sve_access_check(s)) {
3613 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3614 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3615 vec_full_reg_offset(s, a->rn),
138a1f7b 3616 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3617 }
3618 return true;
3619}
3620
3621#define DO_ZZI(NAME, name) \
3a7be554 3622static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3623{ \
3624 static gen_helper_gvec_2i * const fns[4] = { \
3625 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3626 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3627 }; \
3628 return do_zzi_ool(s, a, fns[a->esz]); \
3629}
3630
3631DO_ZZI(SMAX, smax)
3632DO_ZZI(UMAX, umax)
3633DO_ZZI(SMIN, smin)
3634DO_ZZI(UMIN, umin)
3635
3636#undef DO_ZZI
3637
5f425b92
RH
3638static gen_helper_gvec_4 * const dot_fns[2][2] = {
3639 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3640 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3641};
3642TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3643 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3644
814d4c52
RH
3645/*
3646 * SVE Multiply - Indexed
3647 */
3648
f3500a25
RH
3649TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3650 gen_helper_gvec_sdot_idx_b, a)
3651TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3652 gen_helper_gvec_sdot_idx_h, a)
3653TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3654 gen_helper_gvec_udot_idx_b, a)
3655TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3656 gen_helper_gvec_udot_idx_h, a)
3657
3658TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3659 gen_helper_gvec_sudot_idx_b, a)
3660TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3661 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3662
814d4c52 3663#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3664 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3665 a->rd, a->rn, a->rm, a->index)
814d4c52 3666
af031f64
RH
3667DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3668DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3669DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3670
af031f64
RH
3671DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3672DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3673DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3674
af031f64
RH
3675DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3676DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3677DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3678
814d4c52
RH
3679#undef DO_SVE2_RRX
3680
b95f5eeb 3681#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3682 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3683 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3684
3685DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3686DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3687DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3688DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3689
3690DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3691DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3692DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3693DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3694
3695DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3696DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3697DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3698DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3699
b95f5eeb
RH
3700#undef DO_SVE2_RRX_TB
3701
8a02aac7 3702#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3703 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3704
8681eb76
RH
3705DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3706DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3707DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3708
8681eb76
RH
3709DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3710DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3711DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3712
8681eb76
RH
3713DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3714DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3715DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3716
8681eb76
RH
3717DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3718DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3719DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3720
8a02aac7
RH
3721#undef DO_SVE2_RRXR
3722
c5c455d7 3723#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3724 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3725 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3726
3727DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3728DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3729DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3730DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3731
3732DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3733DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3734DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3735DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3736
3737DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3738DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3739DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3740DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3741
3742DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3743DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3744DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3745DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3746
3747DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3748DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3749DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3750DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3751
3752DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3753DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3754DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3755DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3756
3757#undef DO_SVE2_RRXR_TB
3758
3b787ed8 3759#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3760 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3761 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3762
3763DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3764DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3765
3766DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3767DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3768
21068f39
RH
3769DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3770DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3771
3b787ed8
RH
3772#undef DO_SVE2_RRXR_ROT
3773
ca40a6e6
RH
3774/*
3775 *** SVE Floating Point Multiply-Add Indexed Group
3776 */
3777
0a82d963 3778static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
3779{
3780 static gen_helper_gvec_4_ptr * const fns[3] = {
3781 gen_helper_gvec_fmla_idx_h,
3782 gen_helper_gvec_fmla_idx_s,
3783 gen_helper_gvec_fmla_idx_d,
3784 };
3785
3786 if (sve_access_check(s)) {
3787 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3788 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3789 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3790 vec_full_reg_offset(s, a->rn),
3791 vec_full_reg_offset(s, a->rm),
3792 vec_full_reg_offset(s, a->ra),
0a82d963 3793 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
3794 fns[a->esz - 1]);
3795 tcg_temp_free_ptr(status);
3796 }
3797 return true;
3798}
3799
0a82d963
RH
3800static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3801{
3802 return do_FMLA_zzxz(s, a, false);
3803}
3804
3805static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3806{
3807 return do_FMLA_zzxz(s, a, true);
3808}
3809
ca40a6e6
RH
3810/*
3811 *** SVE Floating Point Multiply Indexed Group
3812 */
3813
3a7be554 3814static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3815{
3816 static gen_helper_gvec_3_ptr * const fns[3] = {
3817 gen_helper_gvec_fmul_idx_h,
3818 gen_helper_gvec_fmul_idx_s,
3819 gen_helper_gvec_fmul_idx_d,
3820 };
3821
3822 if (sve_access_check(s)) {
3823 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3824 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3825 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3826 vec_full_reg_offset(s, a->rn),
3827 vec_full_reg_offset(s, a->rm),
3828 status, vsz, vsz, a->index, fns[a->esz - 1]);
3829 tcg_temp_free_ptr(status);
3830 }
3831 return true;
3832}
3833
23fbe79f
RH
3834/*
3835 *** SVE Floating Point Fast Reduction Group
3836 */
3837
3838typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3839 TCGv_ptr, TCGv_i32);
3840
3841static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3842 gen_helper_fp_reduce *fn)
3843{
3844 unsigned vsz = vec_full_reg_size(s);
3845 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 3846 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3847 TCGv_ptr t_zn, t_pg, status;
3848 TCGv_i64 temp;
3849
3850 temp = tcg_temp_new_i64();
3851 t_zn = tcg_temp_new_ptr();
3852 t_pg = tcg_temp_new_ptr();
3853
3854 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3855 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3856 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3857
3858 fn(temp, t_zn, t_pg, status, t_desc);
3859 tcg_temp_free_ptr(t_zn);
3860 tcg_temp_free_ptr(t_pg);
3861 tcg_temp_free_ptr(status);
23fbe79f
RH
3862
3863 write_fp_dreg(s, a->rd, temp);
3864 tcg_temp_free_i64(temp);
3865}
3866
3867#define DO_VPZ(NAME, name) \
3a7be554 3868static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3869{ \
3870 static gen_helper_fp_reduce * const fns[3] = { \
3871 gen_helper_sve_##name##_h, \
3872 gen_helper_sve_##name##_s, \
3873 gen_helper_sve_##name##_d, \
3874 }; \
3875 if (a->esz == 0) { \
3876 return false; \
3877 } \
3878 if (sve_access_check(s)) { \
3879 do_reduce(s, a, fns[a->esz - 1]); \
3880 } \
3881 return true; \
3882}
3883
3884DO_VPZ(FADDV, faddv)
3885DO_VPZ(FMINNMV, fminnmv)
3886DO_VPZ(FMAXNMV, fmaxnmv)
3887DO_VPZ(FMINV, fminv)
3888DO_VPZ(FMAXV, fmaxv)
3889
3887c038
RH
3890/*
3891 *** SVE Floating Point Unary Operations - Unpredicated Group
3892 */
3893
3894static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3895{
3896 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3897 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3898
3899 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3900 vec_full_reg_offset(s, a->rn),
3901 status, vsz, vsz, 0, fn);
3902 tcg_temp_free_ptr(status);
3903}
3904
3a7be554 3905static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3906{
3907 static gen_helper_gvec_2_ptr * const fns[3] = {
3908 gen_helper_gvec_frecpe_h,
3909 gen_helper_gvec_frecpe_s,
3910 gen_helper_gvec_frecpe_d,
3911 };
3912 if (a->esz == 0) {
3913 return false;
3914 }
3915 if (sve_access_check(s)) {
3916 do_zz_fp(s, a, fns[a->esz - 1]);
3917 }
3918 return true;
3919}
3920
3a7be554 3921static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3922{
3923 static gen_helper_gvec_2_ptr * const fns[3] = {
3924 gen_helper_gvec_frsqrte_h,
3925 gen_helper_gvec_frsqrte_s,
3926 gen_helper_gvec_frsqrte_d,
3927 };
3928 if (a->esz == 0) {
3929 return false;
3930 }
3931 if (sve_access_check(s)) {
3932 do_zz_fp(s, a, fns[a->esz - 1]);
3933 }
3934 return true;
3935}
3936
4d2e2a03
RH
3937/*
3938 *** SVE Floating Point Compare with Zero Group
3939 */
3940
3941static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3942 gen_helper_gvec_3_ptr *fn)
3943{
3944 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3945 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3946
3947 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3948 vec_full_reg_offset(s, a->rn),
3949 pred_full_reg_offset(s, a->pg),
3950 status, vsz, vsz, 0, fn);
3951 tcg_temp_free_ptr(status);
3952}
3953
3954#define DO_PPZ(NAME, name) \
3a7be554 3955static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3956{ \
3957 static gen_helper_gvec_3_ptr * const fns[3] = { \
3958 gen_helper_sve_##name##_h, \
3959 gen_helper_sve_##name##_s, \
3960 gen_helper_sve_##name##_d, \
3961 }; \
3962 if (a->esz == 0) { \
3963 return false; \
3964 } \
3965 if (sve_access_check(s)) { \
3966 do_ppz_fp(s, a, fns[a->esz - 1]); \
3967 } \
3968 return true; \
3969}
3970
3971DO_PPZ(FCMGE_ppz0, fcmge0)
3972DO_PPZ(FCMGT_ppz0, fcmgt0)
3973DO_PPZ(FCMLE_ppz0, fcmle0)
3974DO_PPZ(FCMLT_ppz0, fcmlt0)
3975DO_PPZ(FCMEQ_ppz0, fcmeq0)
3976DO_PPZ(FCMNE_ppz0, fcmne0)
3977
3978#undef DO_PPZ
3979
67fcd9ad
RH
3980/*
3981 *** SVE floating-point trig multiply-add coefficient
3982 */
3983
3a7be554 3984static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3985{
3986 static gen_helper_gvec_3_ptr * const fns[3] = {
3987 gen_helper_sve_ftmad_h,
3988 gen_helper_sve_ftmad_s,
3989 gen_helper_sve_ftmad_d,
3990 };
3991
3992 if (a->esz == 0) {
3993 return false;
3994 }
3995 if (sve_access_check(s)) {
3996 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3997 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3998 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3999 vec_full_reg_offset(s, a->rn),
4000 vec_full_reg_offset(s, a->rm),
4001 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4002 tcg_temp_free_ptr(status);
4003 }
4004 return true;
4005}
4006
7f9ddf64
RH
4007/*
4008 *** SVE Floating Point Accumulating Reduction Group
4009 */
4010
3a7be554 4011static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4012{
4013 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4014 TCGv_ptr, TCGv_ptr, TCGv_i32);
4015 static fadda_fn * const fns[3] = {
4016 gen_helper_sve_fadda_h,
4017 gen_helper_sve_fadda_s,
4018 gen_helper_sve_fadda_d,
4019 };
4020 unsigned vsz = vec_full_reg_size(s);
4021 TCGv_ptr t_rm, t_pg, t_fpst;
4022 TCGv_i64 t_val;
4023 TCGv_i32 t_desc;
4024
4025 if (a->esz == 0) {
4026 return false;
4027 }
4028 if (!sve_access_check(s)) {
4029 return true;
4030 }
4031
4032 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4033 t_rm = tcg_temp_new_ptr();
4034 t_pg = tcg_temp_new_ptr();
4035 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4036 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4037 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4038 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
4039
4040 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4041
7f9ddf64
RH
4042 tcg_temp_free_ptr(t_fpst);
4043 tcg_temp_free_ptr(t_pg);
4044 tcg_temp_free_ptr(t_rm);
4045
4046 write_fp_dreg(s, a->rd, t_val);
4047 tcg_temp_free_i64(t_val);
4048 return true;
4049}
4050
29b80469
RH
4051/*
4052 *** SVE Floating Point Arithmetic - Unpredicated Group
4053 */
4054
4055static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4056 gen_helper_gvec_3_ptr *fn)
4057{
4058 if (fn == NULL) {
4059 return false;
4060 }
4061 if (sve_access_check(s)) {
4062 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4063 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4064 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4065 vec_full_reg_offset(s, a->rn),
4066 vec_full_reg_offset(s, a->rm),
4067 status, vsz, vsz, 0, fn);
4068 tcg_temp_free_ptr(status);
4069 }
4070 return true;
4071}
4072
4073
4074#define DO_FP3(NAME, name) \
3a7be554 4075static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4076{ \
4077 static gen_helper_gvec_3_ptr * const fns[4] = { \
4078 NULL, gen_helper_gvec_##name##_h, \
4079 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4080 }; \
4081 return do_zzz_fp(s, a, fns[a->esz]); \
4082}
4083
4084DO_FP3(FADD_zzz, fadd)
4085DO_FP3(FSUB_zzz, fsub)
4086DO_FP3(FMUL_zzz, fmul)
4087DO_FP3(FTSMUL, ftsmul)
4088DO_FP3(FRECPS, recps)
4089DO_FP3(FRSQRTS, rsqrts)
4090
4091#undef DO_FP3
4092
ec3b87c2
RH
4093/*
4094 *** SVE Floating Point Arithmetic - Predicated Group
4095 */
4096
4097static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4098 gen_helper_gvec_4_ptr *fn)
4099{
4100 if (fn == NULL) {
4101 return false;
4102 }
4103 if (sve_access_check(s)) {
4104 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4105 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4106 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4107 vec_full_reg_offset(s, a->rn),
4108 vec_full_reg_offset(s, a->rm),
4109 pred_full_reg_offset(s, a->pg),
4110 status, vsz, vsz, 0, fn);
4111 tcg_temp_free_ptr(status);
4112 }
4113 return true;
4114}
4115
4116#define DO_FP3(NAME, name) \
3a7be554 4117static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4118{ \
4119 static gen_helper_gvec_4_ptr * const fns[4] = { \
4120 NULL, gen_helper_sve_##name##_h, \
4121 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4122 }; \
4123 return do_zpzz_fp(s, a, fns[a->esz]); \
4124}
4125
4126DO_FP3(FADD_zpzz, fadd)
4127DO_FP3(FSUB_zpzz, fsub)
4128DO_FP3(FMUL_zpzz, fmul)
4129DO_FP3(FMIN_zpzz, fmin)
4130DO_FP3(FMAX_zpzz, fmax)
4131DO_FP3(FMINNM_zpzz, fminnum)
4132DO_FP3(FMAXNM_zpzz, fmaxnum)
4133DO_FP3(FABD, fabd)
4134DO_FP3(FSCALE, fscalbn)
4135DO_FP3(FDIV, fdiv)
4136DO_FP3(FMULX, fmulx)
4137
4138#undef DO_FP3
8092c6a3 4139
cc48affe
RH
4140typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4141 TCGv_i64, TCGv_ptr, TCGv_i32);
4142
4143static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4144 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4145{
4146 unsigned vsz = vec_full_reg_size(s);
4147 TCGv_ptr t_zd, t_zn, t_pg, status;
4148 TCGv_i32 desc;
4149
4150 t_zd = tcg_temp_new_ptr();
4151 t_zn = tcg_temp_new_ptr();
4152 t_pg = tcg_temp_new_ptr();
4153 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4154 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4155 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4156
cdfb22bb 4157 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4158 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4159 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4160
cc48affe
RH
4161 tcg_temp_free_ptr(status);
4162 tcg_temp_free_ptr(t_pg);
4163 tcg_temp_free_ptr(t_zn);
4164 tcg_temp_free_ptr(t_zd);
4165}
4166
4167static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4168 gen_helper_sve_fp2scalar *fn)
4169{
138a1f7b
RH
4170 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4171 tcg_constant_i64(imm), fn);
cc48affe
RH
4172}
4173
4174#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4175static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4176{ \
4177 static gen_helper_sve_fp2scalar * const fns[3] = { \
4178 gen_helper_sve_##name##_h, \
4179 gen_helper_sve_##name##_s, \
4180 gen_helper_sve_##name##_d \
4181 }; \
4182 static uint64_t const val[3][2] = { \
4183 { float16_##const0, float16_##const1 }, \
4184 { float32_##const0, float32_##const1 }, \
4185 { float64_##const0, float64_##const1 }, \
4186 }; \
4187 if (a->esz == 0) { \
4188 return false; \
4189 } \
4190 if (sve_access_check(s)) { \
4191 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4192 } \
4193 return true; \
4194}
4195
cc48affe
RH
4196DO_FP_IMM(FADD, fadds, half, one)
4197DO_FP_IMM(FSUB, fsubs, half, one)
4198DO_FP_IMM(FMUL, fmuls, half, two)
4199DO_FP_IMM(FSUBR, fsubrs, half, one)
4200DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4201DO_FP_IMM(FMINNM, fminnms, zero, one)
4202DO_FP_IMM(FMAX, fmaxs, zero, one)
4203DO_FP_IMM(FMIN, fmins, zero, one)
4204
4205#undef DO_FP_IMM
4206
abfdefd5
RH
4207static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4208 gen_helper_gvec_4_ptr *fn)
4209{
4210 if (fn == NULL) {
4211 return false;
4212 }
4213 if (sve_access_check(s)) {
4214 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4215 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4216 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4217 vec_full_reg_offset(s, a->rn),
4218 vec_full_reg_offset(s, a->rm),
4219 pred_full_reg_offset(s, a->pg),
4220 status, vsz, vsz, 0, fn);
4221 tcg_temp_free_ptr(status);
4222 }
4223 return true;
4224}
4225
4226#define DO_FPCMP(NAME, name) \
3a7be554 4227static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4228{ \
4229 static gen_helper_gvec_4_ptr * const fns[4] = { \
4230 NULL, gen_helper_sve_##name##_h, \
4231 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4232 }; \
4233 return do_fp_cmp(s, a, fns[a->esz]); \
4234}
4235
4236DO_FPCMP(FCMGE, fcmge)
4237DO_FPCMP(FCMGT, fcmgt)
4238DO_FPCMP(FCMEQ, fcmeq)
4239DO_FPCMP(FCMNE, fcmne)
4240DO_FPCMP(FCMUO, fcmuo)
4241DO_FPCMP(FACGE, facge)
4242DO_FPCMP(FACGT, facgt)
4243
4244#undef DO_FPCMP
4245
3a7be554 4246static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4247{
4248 static gen_helper_gvec_4_ptr * const fns[3] = {
4249 gen_helper_sve_fcadd_h,
4250 gen_helper_sve_fcadd_s,
4251 gen_helper_sve_fcadd_d
4252 };
4253
4254 if (a->esz == 0) {
4255 return false;
4256 }
4257 if (sve_access_check(s)) {
4258 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4259 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4260 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4261 vec_full_reg_offset(s, a->rn),
4262 vec_full_reg_offset(s, a->rm),
4263 pred_full_reg_offset(s, a->pg),
4264 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4265 tcg_temp_free_ptr(status);
4266 }
4267 return true;
4268}
4269
08975da9
RH
4270static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4271 gen_helper_gvec_5_ptr *fn)
6ceabaad 4272{
08975da9 4273 if (a->esz == 0) {
6ceabaad
RH
4274 return false;
4275 }
08975da9
RH
4276 if (sve_access_check(s)) {
4277 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4278 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4279 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4280 vec_full_reg_offset(s, a->rn),
4281 vec_full_reg_offset(s, a->rm),
4282 vec_full_reg_offset(s, a->ra),
4283 pred_full_reg_offset(s, a->pg),
4284 status, vsz, vsz, 0, fn);
4285 tcg_temp_free_ptr(status);
6ceabaad 4286 }
6ceabaad
RH
4287 return true;
4288}
4289
4290#define DO_FMLA(NAME, name) \
3a7be554 4291static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4292{ \
08975da9 4293 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4294 NULL, gen_helper_sve_##name##_h, \
4295 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4296 }; \
4297 return do_fmla(s, a, fns[a->esz]); \
4298}
4299
4300DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4301DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4302DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4303DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4304
4305#undef DO_FMLA
4306
3a7be554 4307static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4308{
08975da9
RH
4309 static gen_helper_gvec_5_ptr * const fns[4] = {
4310 NULL,
05f48bab
RH
4311 gen_helper_sve_fcmla_zpzzz_h,
4312 gen_helper_sve_fcmla_zpzzz_s,
4313 gen_helper_sve_fcmla_zpzzz_d,
4314 };
4315
4316 if (a->esz == 0) {
4317 return false;
4318 }
4319 if (sve_access_check(s)) {
4320 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4321 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4322 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4323 vec_full_reg_offset(s, a->rn),
4324 vec_full_reg_offset(s, a->rm),
4325 vec_full_reg_offset(s, a->ra),
4326 pred_full_reg_offset(s, a->pg),
4327 status, vsz, vsz, a->rot, fns[a->esz]);
4328 tcg_temp_free_ptr(status);
05f48bab
RH
4329 }
4330 return true;
4331}
4332
3a7be554 4333static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4334{
636ddeb1 4335 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4336 gen_helper_gvec_fcmlah_idx,
4337 gen_helper_gvec_fcmlas_idx,
4338 };
4339
4340 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4341 tcg_debug_assert(a->rd == a->ra);
4342 if (sve_access_check(s)) {
4343 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4344 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4345 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4346 vec_full_reg_offset(s, a->rn),
4347 vec_full_reg_offset(s, a->rm),
636ddeb1 4348 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4349 status, vsz, vsz,
4350 a->index * 4 + a->rot,
4351 fns[a->esz - 1]);
4352 tcg_temp_free_ptr(status);
4353 }
4354 return true;
4355}
4356
8092c6a3
RH
4357/*
4358 *** SVE Floating Point Unary Operations Predicated Group
4359 */
4360
4361static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4362 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4363{
4364 if (sve_access_check(s)) {
4365 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4366 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4367 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4368 vec_full_reg_offset(s, rn),
4369 pred_full_reg_offset(s, pg),
4370 status, vsz, vsz, 0, fn);
4371 tcg_temp_free_ptr(status);
4372 }
4373 return true;
4374}
4375
3a7be554 4376static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4377{
e4ab5124 4378 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4379}
4380
3a7be554 4381static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4382{
4383 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4384}
4385
d29b17ca
RH
4386static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4387{
4388 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4389 return false;
4390 }
4391 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4392}
4393
3a7be554 4394static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4395{
e4ab5124 4396 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4397}
4398
3a7be554 4399static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4400{
4401 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4402}
4403
3a7be554 4404static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4405{
4406 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4407}
4408
3a7be554 4409static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4410{
4411 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4412}
4413
3a7be554 4414static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4415{
4416 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4417}
4418
3a7be554 4419static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4420{
4421 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4422}
4423
3a7be554 4424static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4425{
4426 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4427}
4428
3a7be554 4429static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4430{
4431 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4432}
4433
3a7be554 4434static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4435{
4436 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4437}
4438
3a7be554 4439static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4440{
4441 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4442}
4443
3a7be554 4444static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4445{
4446 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4447}
4448
3a7be554 4449static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4450{
4451 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4452}
4453
3a7be554 4454static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4455{
4456 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4457}
4458
3a7be554 4459static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4460{
4461 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4462}
4463
3a7be554 4464static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4465{
4466 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4467}
4468
3a7be554 4469static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4470{
4471 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4472}
4473
3a7be554 4474static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4475{
4476 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4477}
4478
3a7be554 4479static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4480{
4481 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4482}
4483
cda3c753
RH
4484static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4485 gen_helper_sve_frint_h,
4486 gen_helper_sve_frint_s,
4487 gen_helper_sve_frint_d
4488};
4489
3a7be554 4490static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4491{
4492 if (a->esz == 0) {
4493 return false;
4494 }
4495 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4496 frint_fns[a->esz - 1]);
4497}
4498
3a7be554 4499static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4500{
4501 static gen_helper_gvec_3_ptr * const fns[3] = {
4502 gen_helper_sve_frintx_h,
4503 gen_helper_sve_frintx_s,
4504 gen_helper_sve_frintx_d
4505 };
4506 if (a->esz == 0) {
4507 return false;
4508 }
4509 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4510}
4511
95365277
SL
4512static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4513 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4514{
cda3c753
RH
4515 if (sve_access_check(s)) {
4516 unsigned vsz = vec_full_reg_size(s);
4517 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4518 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4519
4520 gen_helper_set_rmode(tmode, tmode, status);
4521
4522 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4523 vec_full_reg_offset(s, a->rn),
4524 pred_full_reg_offset(s, a->pg),
95365277 4525 status, vsz, vsz, 0, fn);
cda3c753
RH
4526
4527 gen_helper_set_rmode(tmode, tmode, status);
4528 tcg_temp_free_i32(tmode);
4529 tcg_temp_free_ptr(status);
4530 }
4531 return true;
4532}
4533
3a7be554 4534static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4535{
95365277
SL
4536 if (a->esz == 0) {
4537 return false;
4538 }
4539 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4540}
4541
3a7be554 4542static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4543{
95365277
SL
4544 if (a->esz == 0) {
4545 return false;
4546 }
4547 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4548}
4549
3a7be554 4550static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4551{
95365277
SL
4552 if (a->esz == 0) {
4553 return false;
4554 }
4555 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4556}
4557
3a7be554 4558static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4559{
95365277
SL
4560 if (a->esz == 0) {
4561 return false;
4562 }
4563 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4564}
4565
3a7be554 4566static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4567{
95365277
SL
4568 if (a->esz == 0) {
4569 return false;
4570 }
4571 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4572}
4573
3a7be554 4574static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4575{
4576 static gen_helper_gvec_3_ptr * const fns[3] = {
4577 gen_helper_sve_frecpx_h,
4578 gen_helper_sve_frecpx_s,
4579 gen_helper_sve_frecpx_d
4580 };
4581 if (a->esz == 0) {
4582 return false;
4583 }
4584 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4585}
4586
3a7be554 4587static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4588{
4589 static gen_helper_gvec_3_ptr * const fns[3] = {
4590 gen_helper_sve_fsqrt_h,
4591 gen_helper_sve_fsqrt_s,
4592 gen_helper_sve_fsqrt_d
4593 };
4594 if (a->esz == 0) {
4595 return false;
4596 }
4597 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4598}
4599
3a7be554 4600static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4601{
4602 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4603}
4604
3a7be554 4605static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4606{
4607 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4608}
4609
3a7be554 4610static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4611{
4612 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4613}
4614
3a7be554 4615static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4616{
4617 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4618}
4619
3a7be554 4620static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4621{
4622 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4623}
4624
3a7be554 4625static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4626{
4627 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4628}
4629
3a7be554 4630static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4631{
4632 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4633}
4634
3a7be554 4635static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4636{
4637 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4638}
4639
3a7be554 4640static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4641{
4642 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4643}
4644
3a7be554 4645static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4646{
4647 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4648}
4649
3a7be554 4650static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4651{
4652 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4653}
4654
3a7be554 4655static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4656{
4657 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4658}
4659
3a7be554 4660static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4661{
4662 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4663}
4664
3a7be554 4665static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4666{
4667 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4668}
4669
d1822297
RH
4670/*
4671 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4672 */
4673
4674/* Subroutine loading a vector register at VOFS of LEN bytes.
4675 * The load should begin at the address Rn + IMM.
4676 */
4677
19f2acc9 4678static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4679{
19f2acc9
RH
4680 int len_align = QEMU_ALIGN_DOWN(len, 8);
4681 int len_remain = len % 8;
4682 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4683 int midx = get_mem_index(s);
b2aa8879 4684 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4685
b2aa8879
RH
4686 dirty_addr = tcg_temp_new_i64();
4687 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4688 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4689 tcg_temp_free_i64(dirty_addr);
d1822297 4690
b2aa8879
RH
4691 /*
4692 * Note that unpredicated load/store of vector/predicate registers
d1822297 4693 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4694 * operations on larger quantities.
d1822297
RH
4695 * Attempt to keep code expansion to a minimum by limiting the
4696 * amount of unrolling done.
4697 */
4698 if (nparts <= 4) {
4699 int i;
4700
b2aa8879 4701 t0 = tcg_temp_new_i64();
d1822297 4702 for (i = 0; i < len_align; i += 8) {
fc313c64 4703 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4704 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4705 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4706 }
b2aa8879 4707 tcg_temp_free_i64(t0);
d1822297
RH
4708 } else {
4709 TCGLabel *loop = gen_new_label();
4710 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4711
b2aa8879
RH
4712 /* Copy the clean address into a local temp, live across the loop. */
4713 t0 = clean_addr;
4b4dc975 4714 clean_addr = new_tmp_a64_local(s);
b2aa8879 4715 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4716
b2aa8879 4717 gen_set_label(loop);
d1822297 4718
b2aa8879 4719 t0 = tcg_temp_new_i64();
fc313c64 4720 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4721 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4722
b2aa8879 4723 tp = tcg_temp_new_ptr();
d1822297
RH
4724 tcg_gen_add_ptr(tp, cpu_env, i);
4725 tcg_gen_addi_ptr(i, i, 8);
4726 tcg_gen_st_i64(t0, tp, vofs);
4727 tcg_temp_free_ptr(tp);
b2aa8879 4728 tcg_temp_free_i64(t0);
d1822297
RH
4729
4730 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4731 tcg_temp_free_ptr(i);
4732 }
4733
b2aa8879
RH
4734 /*
4735 * Predicate register loads can be any multiple of 2.
d1822297
RH
4736 * Note that we still store the entire 64-bit unit into cpu_env.
4737 */
4738 if (len_remain) {
b2aa8879 4739 t0 = tcg_temp_new_i64();
d1822297
RH
4740 switch (len_remain) {
4741 case 2:
4742 case 4:
4743 case 8:
b2aa8879
RH
4744 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4745 MO_LE | ctz32(len_remain));
d1822297
RH
4746 break;
4747
4748 case 6:
4749 t1 = tcg_temp_new_i64();
b2aa8879
RH
4750 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4751 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4752 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4753 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4754 tcg_temp_free_i64(t1);
4755 break;
4756
4757 default:
4758 g_assert_not_reached();
4759 }
4760 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4761 tcg_temp_free_i64(t0);
d1822297 4762 }
d1822297
RH
4763}
4764
5047c204 4765/* Similarly for stores. */
19f2acc9 4766static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4767{
19f2acc9
RH
4768 int len_align = QEMU_ALIGN_DOWN(len, 8);
4769 int len_remain = len % 8;
4770 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4771 int midx = get_mem_index(s);
bba87d0a 4772 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4773
bba87d0a
RH
4774 dirty_addr = tcg_temp_new_i64();
4775 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4776 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4777 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4778
4779 /* Note that unpredicated load/store of vector/predicate registers
4780 * are defined as a stream of bytes, which equates to little-endian
4781 * operations on larger quantities. There is no nice way to force
4782 * a little-endian store for aarch64_be-linux-user out of line.
4783 *
4784 * Attempt to keep code expansion to a minimum by limiting the
4785 * amount of unrolling done.
4786 */
4787 if (nparts <= 4) {
4788 int i;
4789
bba87d0a 4790 t0 = tcg_temp_new_i64();
5047c204
RH
4791 for (i = 0; i < len_align; i += 8) {
4792 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4793 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4794 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4795 }
bba87d0a 4796 tcg_temp_free_i64(t0);
5047c204
RH
4797 } else {
4798 TCGLabel *loop = gen_new_label();
bba87d0a 4799 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4800
bba87d0a
RH
4801 /* Copy the clean address into a local temp, live across the loop. */
4802 t0 = clean_addr;
4b4dc975 4803 clean_addr = new_tmp_a64_local(s);
bba87d0a 4804 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4805
bba87d0a 4806 gen_set_label(loop);
5047c204 4807
bba87d0a
RH
4808 t0 = tcg_temp_new_i64();
4809 tp = tcg_temp_new_ptr();
4810 tcg_gen_add_ptr(tp, cpu_env, i);
4811 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4812 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4813 tcg_temp_free_ptr(tp);
4814
fc313c64 4815 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4816 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4817 tcg_temp_free_i64(t0);
5047c204
RH
4818
4819 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4820 tcg_temp_free_ptr(i);
4821 }
4822
4823 /* Predicate register stores can be any multiple of 2. */
4824 if (len_remain) {
bba87d0a 4825 t0 = tcg_temp_new_i64();
5047c204 4826 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4827
4828 switch (len_remain) {
4829 case 2:
4830 case 4:
4831 case 8:
bba87d0a
RH
4832 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4833 MO_LE | ctz32(len_remain));
5047c204
RH
4834 break;
4835
4836 case 6:
bba87d0a
RH
4837 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4838 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4839 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4840 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4841 break;
4842
4843 default:
4844 g_assert_not_reached();
4845 }
bba87d0a 4846 tcg_temp_free_i64(t0);
5047c204 4847 }
5047c204
RH
4848}
4849
3a7be554 4850static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4851{
4852 if (sve_access_check(s)) {
4853 int size = vec_full_reg_size(s);
4854 int off = vec_full_reg_offset(s, a->rd);
4855 do_ldr(s, off, size, a->rn, a->imm * size);
4856 }
4857 return true;
4858}
4859
3a7be554 4860static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4861{
4862 if (sve_access_check(s)) {
4863 int size = pred_full_reg_size(s);
4864 int off = pred_full_reg_offset(s, a->rd);
4865 do_ldr(s, off, size, a->rn, a->imm * size);
4866 }
4867 return true;
4868}
c4e7c493 4869
3a7be554 4870static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4871{
4872 if (sve_access_check(s)) {
4873 int size = vec_full_reg_size(s);
4874 int off = vec_full_reg_offset(s, a->rd);
4875 do_str(s, off, size, a->rn, a->imm * size);
4876 }
4877 return true;
4878}
4879
3a7be554 4880static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4881{
4882 if (sve_access_check(s)) {
4883 int size = pred_full_reg_size(s);
4884 int off = pred_full_reg_offset(s, a->rd);
4885 do_str(s, off, size, a->rn, a->imm * size);
4886 }
4887 return true;
4888}
4889
c4e7c493
RH
4890/*
4891 *** SVE Memory - Contiguous Load Group
4892 */
4893
4894/* The memory mode of the dtype. */
14776ab5 4895static const MemOp dtype_mop[16] = {
c4e7c493
RH
4896 MO_UB, MO_UB, MO_UB, MO_UB,
4897 MO_SL, MO_UW, MO_UW, MO_UW,
4898 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4899 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4900};
4901
4902#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4903
4904/* The vector element size of dtype. */
4905static const uint8_t dtype_esz[16] = {
4906 0, 1, 2, 3,
4907 3, 1, 2, 3,
4908 3, 2, 2, 3,
4909 3, 2, 1, 3
4910};
4911
4912static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4913 int dtype, uint32_t mte_n, bool is_write,
4914 gen_helper_gvec_mem *fn)
c4e7c493
RH
4915{
4916 unsigned vsz = vec_full_reg_size(s);
4917 TCGv_ptr t_pg;
206adacf 4918 int desc = 0;
c4e7c493 4919
206adacf
RH
4920 /*
4921 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4922 * registers as pointers, so encode the regno into the data field.
4923 * For consistency, do this even for LD1.
4924 */
9473d0ec 4925 if (s->mte_active[0]) {
206adacf
RH
4926 int msz = dtype_msz(dtype);
4927
4928 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4929 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4930 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4931 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4932 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4933 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4934 } else {
4935 addr = clean_data_tbi(s, addr);
206adacf 4936 }
9473d0ec 4937
206adacf 4938 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4939 t_pg = tcg_temp_new_ptr();
4940
4941 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4942 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4943
4944 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4945}
4946
c182c6db
RH
4947/* Indexed by [mte][be][dtype][nreg] */
4948static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4949 { /* mte inactive, little-endian */
4950 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4951 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4952 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4953 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4954 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4955
4956 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4957 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4958 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4959 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4960 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4961
4962 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4963 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4964 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4965 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4966 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4967
4968 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4969 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4970 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4971 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4972 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4973
4974 /* mte inactive, big-endian */
4975 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4976 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4977 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4978 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4979 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4980
4981 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4982 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4983 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4984 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4985 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4986
4987 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4988 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4989 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4990 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4991 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4992
4993 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4994 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4995 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4996 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4997 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4998
4999 { /* mte active, little-endian */
5000 { { gen_helper_sve_ld1bb_r_mte,
5001 gen_helper_sve_ld2bb_r_mte,
5002 gen_helper_sve_ld3bb_r_mte,
5003 gen_helper_sve_ld4bb_r_mte },
5004 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5005 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5006 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5007
5008 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5009 { gen_helper_sve_ld1hh_le_r_mte,
5010 gen_helper_sve_ld2hh_le_r_mte,
5011 gen_helper_sve_ld3hh_le_r_mte,
5012 gen_helper_sve_ld4hh_le_r_mte },
5013 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5014 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5015
5016 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5017 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5018 { gen_helper_sve_ld1ss_le_r_mte,
5019 gen_helper_sve_ld2ss_le_r_mte,
5020 gen_helper_sve_ld3ss_le_r_mte,
5021 gen_helper_sve_ld4ss_le_r_mte },
5022 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5023
5024 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5025 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5026 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5027 { gen_helper_sve_ld1dd_le_r_mte,
5028 gen_helper_sve_ld2dd_le_r_mte,
5029 gen_helper_sve_ld3dd_le_r_mte,
5030 gen_helper_sve_ld4dd_le_r_mte } },
5031
5032 /* mte active, big-endian */
5033 { { gen_helper_sve_ld1bb_r_mte,
5034 gen_helper_sve_ld2bb_r_mte,
5035 gen_helper_sve_ld3bb_r_mte,
5036 gen_helper_sve_ld4bb_r_mte },
5037 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5038 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5039 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5040
5041 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5042 { gen_helper_sve_ld1hh_be_r_mte,
5043 gen_helper_sve_ld2hh_be_r_mte,
5044 gen_helper_sve_ld3hh_be_r_mte,
5045 gen_helper_sve_ld4hh_be_r_mte },
5046 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5047 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5048
5049 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5050 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5051 { gen_helper_sve_ld1ss_be_r_mte,
5052 gen_helper_sve_ld2ss_be_r_mte,
5053 gen_helper_sve_ld3ss_be_r_mte,
5054 gen_helper_sve_ld4ss_be_r_mte },
5055 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5056
5057 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5058 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5059 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5060 { gen_helper_sve_ld1dd_be_r_mte,
5061 gen_helper_sve_ld2dd_be_r_mte,
5062 gen_helper_sve_ld3dd_be_r_mte,
5063 gen_helper_sve_ld4dd_be_r_mte } } },
5064};
5065
c4e7c493
RH
5066static void do_ld_zpa(DisasContext *s, int zt, int pg,
5067 TCGv_i64 addr, int dtype, int nreg)
5068{
206adacf 5069 gen_helper_gvec_mem *fn
c182c6db 5070 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5071
206adacf
RH
5072 /*
5073 * While there are holes in the table, they are not
c4e7c493
RH
5074 * accessible via the instruction encoding.
5075 */
5076 assert(fn != NULL);
206adacf 5077 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5078}
5079
3a7be554 5080static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5081{
5082 if (a->rm == 31) {
5083 return false;
5084 }
5085 if (sve_access_check(s)) {
5086 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5087 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5088 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5089 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5090 }
5091 return true;
5092}
5093
3a7be554 5094static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5095{
5096 if (sve_access_check(s)) {
5097 int vsz = vec_full_reg_size(s);
5098 int elements = vsz >> dtype_esz[a->dtype];
5099 TCGv_i64 addr = new_tmp_a64(s);
5100
5101 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5102 (a->imm * elements * (a->nreg + 1))
5103 << dtype_msz(a->dtype));
5104 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5105 }
5106 return true;
5107}
e2654d75 5108
3a7be554 5109static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5110{
aa13f7c3
RH
5111 static gen_helper_gvec_mem * const fns[2][2][16] = {
5112 { /* mte inactive, little-endian */
5113 { gen_helper_sve_ldff1bb_r,
5114 gen_helper_sve_ldff1bhu_r,
5115 gen_helper_sve_ldff1bsu_r,
5116 gen_helper_sve_ldff1bdu_r,
5117
5118 gen_helper_sve_ldff1sds_le_r,
5119 gen_helper_sve_ldff1hh_le_r,
5120 gen_helper_sve_ldff1hsu_le_r,
5121 gen_helper_sve_ldff1hdu_le_r,
5122
5123 gen_helper_sve_ldff1hds_le_r,
5124 gen_helper_sve_ldff1hss_le_r,
5125 gen_helper_sve_ldff1ss_le_r,
5126 gen_helper_sve_ldff1sdu_le_r,
5127
5128 gen_helper_sve_ldff1bds_r,
5129 gen_helper_sve_ldff1bss_r,
5130 gen_helper_sve_ldff1bhs_r,
5131 gen_helper_sve_ldff1dd_le_r },
5132
5133 /* mte inactive, big-endian */
5134 { gen_helper_sve_ldff1bb_r,
5135 gen_helper_sve_ldff1bhu_r,
5136 gen_helper_sve_ldff1bsu_r,
5137 gen_helper_sve_ldff1bdu_r,
5138
5139 gen_helper_sve_ldff1sds_be_r,
5140 gen_helper_sve_ldff1hh_be_r,
5141 gen_helper_sve_ldff1hsu_be_r,
5142 gen_helper_sve_ldff1hdu_be_r,
5143
5144 gen_helper_sve_ldff1hds_be_r,
5145 gen_helper_sve_ldff1hss_be_r,
5146 gen_helper_sve_ldff1ss_be_r,
5147 gen_helper_sve_ldff1sdu_be_r,
5148
5149 gen_helper_sve_ldff1bds_r,
5150 gen_helper_sve_ldff1bss_r,
5151 gen_helper_sve_ldff1bhs_r,
5152 gen_helper_sve_ldff1dd_be_r } },
5153
5154 { /* mte active, little-endian */
5155 { gen_helper_sve_ldff1bb_r_mte,
5156 gen_helper_sve_ldff1bhu_r_mte,
5157 gen_helper_sve_ldff1bsu_r_mte,
5158 gen_helper_sve_ldff1bdu_r_mte,
5159
5160 gen_helper_sve_ldff1sds_le_r_mte,
5161 gen_helper_sve_ldff1hh_le_r_mte,
5162 gen_helper_sve_ldff1hsu_le_r_mte,
5163 gen_helper_sve_ldff1hdu_le_r_mte,
5164
5165 gen_helper_sve_ldff1hds_le_r_mte,
5166 gen_helper_sve_ldff1hss_le_r_mte,
5167 gen_helper_sve_ldff1ss_le_r_mte,
5168 gen_helper_sve_ldff1sdu_le_r_mte,
5169
5170 gen_helper_sve_ldff1bds_r_mte,
5171 gen_helper_sve_ldff1bss_r_mte,
5172 gen_helper_sve_ldff1bhs_r_mte,
5173 gen_helper_sve_ldff1dd_le_r_mte },
5174
5175 /* mte active, big-endian */
5176 { gen_helper_sve_ldff1bb_r_mte,
5177 gen_helper_sve_ldff1bhu_r_mte,
5178 gen_helper_sve_ldff1bsu_r_mte,
5179 gen_helper_sve_ldff1bdu_r_mte,
5180
5181 gen_helper_sve_ldff1sds_be_r_mte,
5182 gen_helper_sve_ldff1hh_be_r_mte,
5183 gen_helper_sve_ldff1hsu_be_r_mte,
5184 gen_helper_sve_ldff1hdu_be_r_mte,
5185
5186 gen_helper_sve_ldff1hds_be_r_mte,
5187 gen_helper_sve_ldff1hss_be_r_mte,
5188 gen_helper_sve_ldff1ss_be_r_mte,
5189 gen_helper_sve_ldff1sdu_be_r_mte,
5190
5191 gen_helper_sve_ldff1bds_r_mte,
5192 gen_helper_sve_ldff1bss_r_mte,
5193 gen_helper_sve_ldff1bhs_r_mte,
5194 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5195 };
5196
5197 if (sve_access_check(s)) {
5198 TCGv_i64 addr = new_tmp_a64(s);
5199 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5200 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5201 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5202 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5203 }
5204 return true;
5205}
5206
3a7be554 5207static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5208{
aa13f7c3
RH
5209 static gen_helper_gvec_mem * const fns[2][2][16] = {
5210 { /* mte inactive, little-endian */
5211 { gen_helper_sve_ldnf1bb_r,
5212 gen_helper_sve_ldnf1bhu_r,
5213 gen_helper_sve_ldnf1bsu_r,
5214 gen_helper_sve_ldnf1bdu_r,
5215
5216 gen_helper_sve_ldnf1sds_le_r,
5217 gen_helper_sve_ldnf1hh_le_r,
5218 gen_helper_sve_ldnf1hsu_le_r,
5219 gen_helper_sve_ldnf1hdu_le_r,
5220
5221 gen_helper_sve_ldnf1hds_le_r,
5222 gen_helper_sve_ldnf1hss_le_r,
5223 gen_helper_sve_ldnf1ss_le_r,
5224 gen_helper_sve_ldnf1sdu_le_r,
5225
5226 gen_helper_sve_ldnf1bds_r,
5227 gen_helper_sve_ldnf1bss_r,
5228 gen_helper_sve_ldnf1bhs_r,
5229 gen_helper_sve_ldnf1dd_le_r },
5230
5231 /* mte inactive, big-endian */
5232 { gen_helper_sve_ldnf1bb_r,
5233 gen_helper_sve_ldnf1bhu_r,
5234 gen_helper_sve_ldnf1bsu_r,
5235 gen_helper_sve_ldnf1bdu_r,
5236
5237 gen_helper_sve_ldnf1sds_be_r,
5238 gen_helper_sve_ldnf1hh_be_r,
5239 gen_helper_sve_ldnf1hsu_be_r,
5240 gen_helper_sve_ldnf1hdu_be_r,
5241
5242 gen_helper_sve_ldnf1hds_be_r,
5243 gen_helper_sve_ldnf1hss_be_r,
5244 gen_helper_sve_ldnf1ss_be_r,
5245 gen_helper_sve_ldnf1sdu_be_r,
5246
5247 gen_helper_sve_ldnf1bds_r,
5248 gen_helper_sve_ldnf1bss_r,
5249 gen_helper_sve_ldnf1bhs_r,
5250 gen_helper_sve_ldnf1dd_be_r } },
5251
5252 { /* mte inactive, little-endian */
5253 { gen_helper_sve_ldnf1bb_r_mte,
5254 gen_helper_sve_ldnf1bhu_r_mte,
5255 gen_helper_sve_ldnf1bsu_r_mte,
5256 gen_helper_sve_ldnf1bdu_r_mte,
5257
5258 gen_helper_sve_ldnf1sds_le_r_mte,
5259 gen_helper_sve_ldnf1hh_le_r_mte,
5260 gen_helper_sve_ldnf1hsu_le_r_mte,
5261 gen_helper_sve_ldnf1hdu_le_r_mte,
5262
5263 gen_helper_sve_ldnf1hds_le_r_mte,
5264 gen_helper_sve_ldnf1hss_le_r_mte,
5265 gen_helper_sve_ldnf1ss_le_r_mte,
5266 gen_helper_sve_ldnf1sdu_le_r_mte,
5267
5268 gen_helper_sve_ldnf1bds_r_mte,
5269 gen_helper_sve_ldnf1bss_r_mte,
5270 gen_helper_sve_ldnf1bhs_r_mte,
5271 gen_helper_sve_ldnf1dd_le_r_mte },
5272
5273 /* mte inactive, big-endian */
5274 { gen_helper_sve_ldnf1bb_r_mte,
5275 gen_helper_sve_ldnf1bhu_r_mte,
5276 gen_helper_sve_ldnf1bsu_r_mte,
5277 gen_helper_sve_ldnf1bdu_r_mte,
5278
5279 gen_helper_sve_ldnf1sds_be_r_mte,
5280 gen_helper_sve_ldnf1hh_be_r_mte,
5281 gen_helper_sve_ldnf1hsu_be_r_mte,
5282 gen_helper_sve_ldnf1hdu_be_r_mte,
5283
5284 gen_helper_sve_ldnf1hds_be_r_mte,
5285 gen_helper_sve_ldnf1hss_be_r_mte,
5286 gen_helper_sve_ldnf1ss_be_r_mte,
5287 gen_helper_sve_ldnf1sdu_be_r_mte,
5288
5289 gen_helper_sve_ldnf1bds_r_mte,
5290 gen_helper_sve_ldnf1bss_r_mte,
5291 gen_helper_sve_ldnf1bhs_r_mte,
5292 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5293 };
5294
5295 if (sve_access_check(s)) {
5296 int vsz = vec_full_reg_size(s);
5297 int elements = vsz >> dtype_esz[a->dtype];
5298 int off = (a->imm * elements) << dtype_msz(a->dtype);
5299 TCGv_i64 addr = new_tmp_a64(s);
5300
5301 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5302 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5303 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5304 }
5305 return true;
5306}
1a039c7e 5307
c182c6db 5308static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5309{
05abe304
RH
5310 unsigned vsz = vec_full_reg_size(s);
5311 TCGv_ptr t_pg;
7924d239 5312 int poff;
05abe304
RH
5313
5314 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5315 poff = pred_full_reg_offset(s, pg);
5316 if (vsz > 16) {
5317 /*
5318 * Zero-extend the first 16 bits of the predicate into a temporary.
5319 * This avoids triggering an assert making sure we don't have bits
5320 * set within a predicate beyond VQ, but we have lowered VQ to 1
5321 * for this load operation.
5322 */
5323 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5324#if HOST_BIG_ENDIAN
2a99ab2b
RH
5325 poff += 6;
5326#endif
5327 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5328
5329 poff = offsetof(CPUARMState, vfp.preg_tmp);
5330 tcg_gen_st_i64(tmp, cpu_env, poff);
5331 tcg_temp_free_i64(tmp);
5332 }
5333
05abe304 5334 t_pg = tcg_temp_new_ptr();
2a99ab2b 5335 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5336
c182c6db
RH
5337 gen_helper_gvec_mem *fn
5338 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5339 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5340
5341 tcg_temp_free_ptr(t_pg);
05abe304
RH
5342
5343 /* Replicate that first quadword. */
5344 if (vsz > 16) {
7924d239
RH
5345 int doff = vec_full_reg_offset(s, zt);
5346 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5347 }
5348}
5349
3a7be554 5350static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5351{
5352 if (a->rm == 31) {
5353 return false;
5354 }
5355 if (sve_access_check(s)) {
5356 int msz = dtype_msz(a->dtype);
5357 TCGv_i64 addr = new_tmp_a64(s);
5358 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5359 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5360 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5361 }
5362 return true;
5363}
5364
3a7be554 5365static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5366{
5367 if (sve_access_check(s)) {
5368 TCGv_i64 addr = new_tmp_a64(s);
5369 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5370 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5371 }
5372 return true;
5373}
5374
12c563f6
RH
5375static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5376{
5377 unsigned vsz = vec_full_reg_size(s);
5378 unsigned vsz_r32;
5379 TCGv_ptr t_pg;
5380 int poff, doff;
5381
5382 if (vsz < 32) {
5383 /*
5384 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5385 * in the ARM pseudocode, which is the sve_access_check() done
5386 * in our caller. We should not now return false from the caller.
5387 */
5388 unallocated_encoding(s);
5389 return;
5390 }
5391
5392 /* Load the first octaword using the normal predicated load helpers. */
5393
5394 poff = pred_full_reg_offset(s, pg);
5395 if (vsz > 32) {
5396 /*
5397 * Zero-extend the first 32 bits of the predicate into a temporary.
5398 * This avoids triggering an assert making sure we don't have bits
5399 * set within a predicate beyond VQ, but we have lowered VQ to 2
5400 * for this load operation.
5401 */
5402 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5403#if HOST_BIG_ENDIAN
12c563f6
RH
5404 poff += 4;
5405#endif
5406 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5407
5408 poff = offsetof(CPUARMState, vfp.preg_tmp);
5409 tcg_gen_st_i64(tmp, cpu_env, poff);
5410 tcg_temp_free_i64(tmp);
5411 }
5412
5413 t_pg = tcg_temp_new_ptr();
5414 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5415
5416 gen_helper_gvec_mem *fn
5417 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5418 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5419
5420 tcg_temp_free_ptr(t_pg);
5421
5422 /*
5423 * Replicate that first octaword.
5424 * The replication happens in units of 32; if the full vector size
5425 * is not a multiple of 32, the final bits are zeroed.
5426 */
5427 doff = vec_full_reg_offset(s, zt);
5428 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5429 if (vsz >= 64) {
5430 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5431 }
5432 vsz -= vsz_r32;
5433 if (vsz) {
5434 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5435 }
5436}
5437
5438static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5439{
5440 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5441 return false;
5442 }
5443 if (a->rm == 31) {
5444 return false;
5445 }
5446 if (sve_access_check(s)) {
5447 TCGv_i64 addr = new_tmp_a64(s);
5448 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5449 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5450 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5451 }
5452 return true;
5453}
5454
5455static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5456{
5457 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5458 return false;
5459 }
5460 if (sve_access_check(s)) {
5461 TCGv_i64 addr = new_tmp_a64(s);
5462 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5463 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5464 }
5465 return true;
5466}
5467
68459864 5468/* Load and broadcast element. */
3a7be554 5469static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5470{
68459864
RH
5471 unsigned vsz = vec_full_reg_size(s);
5472 unsigned psz = pred_full_reg_size(s);
5473 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5474 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5475 TCGLabel *over;
4ac430e1 5476 TCGv_i64 temp, clean_addr;
68459864 5477
c0ed9166
RH
5478 if (!sve_access_check(s)) {
5479 return true;
5480 }
5481
5482 over = gen_new_label();
5483
68459864
RH
5484 /* If the guarding predicate has no bits set, no load occurs. */
5485 if (psz <= 8) {
5486 /* Reduce the pred_esz_masks value simply to reduce the
5487 * size of the code generated here.
5488 */
5489 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5490 temp = tcg_temp_new_i64();
5491 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5492 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5493 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5494 tcg_temp_free_i64(temp);
5495 } else {
5496 TCGv_i32 t32 = tcg_temp_new_i32();
5497 find_last_active(s, t32, esz, a->pg);
5498 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5499 tcg_temp_free_i32(t32);
5500 }
5501
5502 /* Load the data. */
5503 temp = tcg_temp_new_i64();
d0e372b0 5504 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5505 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5506
5507 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5508 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5509
5510 /* Broadcast to *all* elements. */
5511 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5512 vsz, vsz, temp);
5513 tcg_temp_free_i64(temp);
5514
5515 /* Zero the inactive elements. */
5516 gen_set_label(over);
60245996 5517 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5518}
5519
1a039c7e
RH
5520static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5521 int msz, int esz, int nreg)
5522{
71b9f394
RH
5523 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5524 { { { gen_helper_sve_st1bb_r,
5525 gen_helper_sve_st1bh_r,
5526 gen_helper_sve_st1bs_r,
5527 gen_helper_sve_st1bd_r },
5528 { NULL,
5529 gen_helper_sve_st1hh_le_r,
5530 gen_helper_sve_st1hs_le_r,
5531 gen_helper_sve_st1hd_le_r },
5532 { NULL, NULL,
5533 gen_helper_sve_st1ss_le_r,
5534 gen_helper_sve_st1sd_le_r },
5535 { NULL, NULL, NULL,
5536 gen_helper_sve_st1dd_le_r } },
5537 { { gen_helper_sve_st1bb_r,
5538 gen_helper_sve_st1bh_r,
5539 gen_helper_sve_st1bs_r,
5540 gen_helper_sve_st1bd_r },
5541 { NULL,
5542 gen_helper_sve_st1hh_be_r,
5543 gen_helper_sve_st1hs_be_r,
5544 gen_helper_sve_st1hd_be_r },
5545 { NULL, NULL,
5546 gen_helper_sve_st1ss_be_r,
5547 gen_helper_sve_st1sd_be_r },
5548 { NULL, NULL, NULL,
5549 gen_helper_sve_st1dd_be_r } } },
5550
5551 { { { gen_helper_sve_st1bb_r_mte,
5552 gen_helper_sve_st1bh_r_mte,
5553 gen_helper_sve_st1bs_r_mte,
5554 gen_helper_sve_st1bd_r_mte },
5555 { NULL,
5556 gen_helper_sve_st1hh_le_r_mte,
5557 gen_helper_sve_st1hs_le_r_mte,
5558 gen_helper_sve_st1hd_le_r_mte },
5559 { NULL, NULL,
5560 gen_helper_sve_st1ss_le_r_mte,
5561 gen_helper_sve_st1sd_le_r_mte },
5562 { NULL, NULL, NULL,
5563 gen_helper_sve_st1dd_le_r_mte } },
5564 { { gen_helper_sve_st1bb_r_mte,
5565 gen_helper_sve_st1bh_r_mte,
5566 gen_helper_sve_st1bs_r_mte,
5567 gen_helper_sve_st1bd_r_mte },
5568 { NULL,
5569 gen_helper_sve_st1hh_be_r_mte,
5570 gen_helper_sve_st1hs_be_r_mte,
5571 gen_helper_sve_st1hd_be_r_mte },
5572 { NULL, NULL,
5573 gen_helper_sve_st1ss_be_r_mte,
5574 gen_helper_sve_st1sd_be_r_mte },
5575 { NULL, NULL, NULL,
5576 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5577 };
71b9f394
RH
5578 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5579 { { { gen_helper_sve_st2bb_r,
5580 gen_helper_sve_st2hh_le_r,
5581 gen_helper_sve_st2ss_le_r,
5582 gen_helper_sve_st2dd_le_r },
5583 { gen_helper_sve_st3bb_r,
5584 gen_helper_sve_st3hh_le_r,
5585 gen_helper_sve_st3ss_le_r,
5586 gen_helper_sve_st3dd_le_r },
5587 { gen_helper_sve_st4bb_r,
5588 gen_helper_sve_st4hh_le_r,
5589 gen_helper_sve_st4ss_le_r,
5590 gen_helper_sve_st4dd_le_r } },
5591 { { gen_helper_sve_st2bb_r,
5592 gen_helper_sve_st2hh_be_r,
5593 gen_helper_sve_st2ss_be_r,
5594 gen_helper_sve_st2dd_be_r },
5595 { gen_helper_sve_st3bb_r,
5596 gen_helper_sve_st3hh_be_r,
5597 gen_helper_sve_st3ss_be_r,
5598 gen_helper_sve_st3dd_be_r },
5599 { gen_helper_sve_st4bb_r,
5600 gen_helper_sve_st4hh_be_r,
5601 gen_helper_sve_st4ss_be_r,
5602 gen_helper_sve_st4dd_be_r } } },
5603 { { { gen_helper_sve_st2bb_r_mte,
5604 gen_helper_sve_st2hh_le_r_mte,
5605 gen_helper_sve_st2ss_le_r_mte,
5606 gen_helper_sve_st2dd_le_r_mte },
5607 { gen_helper_sve_st3bb_r_mte,
5608 gen_helper_sve_st3hh_le_r_mte,
5609 gen_helper_sve_st3ss_le_r_mte,
5610 gen_helper_sve_st3dd_le_r_mte },
5611 { gen_helper_sve_st4bb_r_mte,
5612 gen_helper_sve_st4hh_le_r_mte,
5613 gen_helper_sve_st4ss_le_r_mte,
5614 gen_helper_sve_st4dd_le_r_mte } },
5615 { { gen_helper_sve_st2bb_r_mte,
5616 gen_helper_sve_st2hh_be_r_mte,
5617 gen_helper_sve_st2ss_be_r_mte,
5618 gen_helper_sve_st2dd_be_r_mte },
5619 { gen_helper_sve_st3bb_r_mte,
5620 gen_helper_sve_st3hh_be_r_mte,
5621 gen_helper_sve_st3ss_be_r_mte,
5622 gen_helper_sve_st3dd_be_r_mte },
5623 { gen_helper_sve_st4bb_r_mte,
5624 gen_helper_sve_st4hh_be_r_mte,
5625 gen_helper_sve_st4ss_be_r_mte,
5626 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5627 };
5628 gen_helper_gvec_mem *fn;
28d57f2d 5629 int be = s->be_data == MO_BE;
1a039c7e
RH
5630
5631 if (nreg == 0) {
5632 /* ST1 */
71b9f394
RH
5633 fn = fn_single[s->mte_active[0]][be][msz][esz];
5634 nreg = 1;
1a039c7e
RH
5635 } else {
5636 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5637 assert(msz == esz);
71b9f394 5638 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5639 }
5640 assert(fn != NULL);
71b9f394 5641 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5642}
5643
3a7be554 5644static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5645{
5646 if (a->rm == 31 || a->msz > a->esz) {
5647 return false;
5648 }
5649 if (sve_access_check(s)) {
5650 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5651 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5652 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5653 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5654 }
5655 return true;
5656}
5657
3a7be554 5658static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5659{
5660 if (a->msz > a->esz) {
5661 return false;
5662 }
5663 if (sve_access_check(s)) {
5664 int vsz = vec_full_reg_size(s);
5665 int elements = vsz >> a->esz;
5666 TCGv_i64 addr = new_tmp_a64(s);
5667
5668 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5669 (a->imm * elements * (a->nreg + 1)) << a->msz);
5670 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5671 }
5672 return true;
5673}
f6dbf62a
RH
5674
5675/*
5676 *** SVE gather loads / scatter stores
5677 */
5678
500d0484 5679static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5680 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5681 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5682{
5683 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5684 TCGv_ptr t_zm = tcg_temp_new_ptr();
5685 TCGv_ptr t_pg = tcg_temp_new_ptr();
5686 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5687 int desc = 0;
500d0484 5688
d28d12f0
RH
5689 if (s->mte_active[0]) {
5690 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5691 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5692 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5693 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5694 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5695 desc <<= SVE_MTEDESC_SHIFT;
5696 }
cdecb3fc 5697 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5698
5699 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5700 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5701 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5702 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5703
5704 tcg_temp_free_ptr(t_zt);
5705 tcg_temp_free_ptr(t_zm);
5706 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5707}
5708
d28d12f0
RH
5709/* Indexed by [mte][be][ff][xs][u][msz]. */
5710static gen_helper_gvec_mem_scatter * const
5711gather_load_fn32[2][2][2][2][2][3] = {
5712 { /* MTE Inactive */
5713 { /* Little-endian */
5714 { { { gen_helper_sve_ldbss_zsu,
5715 gen_helper_sve_ldhss_le_zsu,
5716 NULL, },
5717 { gen_helper_sve_ldbsu_zsu,
5718 gen_helper_sve_ldhsu_le_zsu,
5719 gen_helper_sve_ldss_le_zsu, } },
5720 { { gen_helper_sve_ldbss_zss,
5721 gen_helper_sve_ldhss_le_zss,
5722 NULL, },
5723 { gen_helper_sve_ldbsu_zss,
5724 gen_helper_sve_ldhsu_le_zss,
5725 gen_helper_sve_ldss_le_zss, } } },
5726
5727 /* First-fault */
5728 { { { gen_helper_sve_ldffbss_zsu,
5729 gen_helper_sve_ldffhss_le_zsu,
5730 NULL, },
5731 { gen_helper_sve_ldffbsu_zsu,
5732 gen_helper_sve_ldffhsu_le_zsu,
5733 gen_helper_sve_ldffss_le_zsu, } },
5734 { { gen_helper_sve_ldffbss_zss,
5735 gen_helper_sve_ldffhss_le_zss,
5736 NULL, },
5737 { gen_helper_sve_ldffbsu_zss,
5738 gen_helper_sve_ldffhsu_le_zss,
5739 gen_helper_sve_ldffss_le_zss, } } } },
5740
5741 { /* Big-endian */
5742 { { { gen_helper_sve_ldbss_zsu,
5743 gen_helper_sve_ldhss_be_zsu,
5744 NULL, },
5745 { gen_helper_sve_ldbsu_zsu,
5746 gen_helper_sve_ldhsu_be_zsu,
5747 gen_helper_sve_ldss_be_zsu, } },
5748 { { gen_helper_sve_ldbss_zss,
5749 gen_helper_sve_ldhss_be_zss,
5750 NULL, },
5751 { gen_helper_sve_ldbsu_zss,
5752 gen_helper_sve_ldhsu_be_zss,
5753 gen_helper_sve_ldss_be_zss, } } },
5754
5755 /* First-fault */
5756 { { { gen_helper_sve_ldffbss_zsu,
5757 gen_helper_sve_ldffhss_be_zsu,
5758 NULL, },
5759 { gen_helper_sve_ldffbsu_zsu,
5760 gen_helper_sve_ldffhsu_be_zsu,
5761 gen_helper_sve_ldffss_be_zsu, } },
5762 { { gen_helper_sve_ldffbss_zss,
5763 gen_helper_sve_ldffhss_be_zss,
5764 NULL, },
5765 { gen_helper_sve_ldffbsu_zss,
5766 gen_helper_sve_ldffhsu_be_zss,
5767 gen_helper_sve_ldffss_be_zss, } } } } },
5768 { /* MTE Active */
5769 { /* Little-endian */
5770 { { { gen_helper_sve_ldbss_zsu_mte,
5771 gen_helper_sve_ldhss_le_zsu_mte,
5772 NULL, },
5773 { gen_helper_sve_ldbsu_zsu_mte,
5774 gen_helper_sve_ldhsu_le_zsu_mte,
5775 gen_helper_sve_ldss_le_zsu_mte, } },
5776 { { gen_helper_sve_ldbss_zss_mte,
5777 gen_helper_sve_ldhss_le_zss_mte,
5778 NULL, },
5779 { gen_helper_sve_ldbsu_zss_mte,
5780 gen_helper_sve_ldhsu_le_zss_mte,
5781 gen_helper_sve_ldss_le_zss_mte, } } },
5782
5783 /* First-fault */
5784 { { { gen_helper_sve_ldffbss_zsu_mte,
5785 gen_helper_sve_ldffhss_le_zsu_mte,
5786 NULL, },
5787 { gen_helper_sve_ldffbsu_zsu_mte,
5788 gen_helper_sve_ldffhsu_le_zsu_mte,
5789 gen_helper_sve_ldffss_le_zsu_mte, } },
5790 { { gen_helper_sve_ldffbss_zss_mte,
5791 gen_helper_sve_ldffhss_le_zss_mte,
5792 NULL, },
5793 { gen_helper_sve_ldffbsu_zss_mte,
5794 gen_helper_sve_ldffhsu_le_zss_mte,
5795 gen_helper_sve_ldffss_le_zss_mte, } } } },
5796
5797 { /* Big-endian */
5798 { { { gen_helper_sve_ldbss_zsu_mte,
5799 gen_helper_sve_ldhss_be_zsu_mte,
5800 NULL, },
5801 { gen_helper_sve_ldbsu_zsu_mte,
5802 gen_helper_sve_ldhsu_be_zsu_mte,
5803 gen_helper_sve_ldss_be_zsu_mte, } },
5804 { { gen_helper_sve_ldbss_zss_mte,
5805 gen_helper_sve_ldhss_be_zss_mte,
5806 NULL, },
5807 { gen_helper_sve_ldbsu_zss_mte,
5808 gen_helper_sve_ldhsu_be_zss_mte,
5809 gen_helper_sve_ldss_be_zss_mte, } } },
5810
5811 /* First-fault */
5812 { { { gen_helper_sve_ldffbss_zsu_mte,
5813 gen_helper_sve_ldffhss_be_zsu_mte,
5814 NULL, },
5815 { gen_helper_sve_ldffbsu_zsu_mte,
5816 gen_helper_sve_ldffhsu_be_zsu_mte,
5817 gen_helper_sve_ldffss_be_zsu_mte, } },
5818 { { gen_helper_sve_ldffbss_zss_mte,
5819 gen_helper_sve_ldffhss_be_zss_mte,
5820 NULL, },
5821 { gen_helper_sve_ldffbsu_zss_mte,
5822 gen_helper_sve_ldffhsu_be_zss_mte,
5823 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5824};
5825
5826/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5827static gen_helper_gvec_mem_scatter * const
5828gather_load_fn64[2][2][2][3][2][4] = {
5829 { /* MTE Inactive */
5830 { /* Little-endian */
5831 { { { gen_helper_sve_ldbds_zsu,
5832 gen_helper_sve_ldhds_le_zsu,
5833 gen_helper_sve_ldsds_le_zsu,
5834 NULL, },
5835 { gen_helper_sve_ldbdu_zsu,
5836 gen_helper_sve_ldhdu_le_zsu,
5837 gen_helper_sve_ldsdu_le_zsu,
5838 gen_helper_sve_lddd_le_zsu, } },
5839 { { gen_helper_sve_ldbds_zss,
5840 gen_helper_sve_ldhds_le_zss,
5841 gen_helper_sve_ldsds_le_zss,
5842 NULL, },
5843 { gen_helper_sve_ldbdu_zss,
5844 gen_helper_sve_ldhdu_le_zss,
5845 gen_helper_sve_ldsdu_le_zss,
5846 gen_helper_sve_lddd_le_zss, } },
5847 { { gen_helper_sve_ldbds_zd,
5848 gen_helper_sve_ldhds_le_zd,
5849 gen_helper_sve_ldsds_le_zd,
5850 NULL, },
5851 { gen_helper_sve_ldbdu_zd,
5852 gen_helper_sve_ldhdu_le_zd,
5853 gen_helper_sve_ldsdu_le_zd,
5854 gen_helper_sve_lddd_le_zd, } } },
5855
5856 /* First-fault */
5857 { { { gen_helper_sve_ldffbds_zsu,
5858 gen_helper_sve_ldffhds_le_zsu,
5859 gen_helper_sve_ldffsds_le_zsu,
5860 NULL, },
5861 { gen_helper_sve_ldffbdu_zsu,
5862 gen_helper_sve_ldffhdu_le_zsu,
5863 gen_helper_sve_ldffsdu_le_zsu,
5864 gen_helper_sve_ldffdd_le_zsu, } },
5865 { { gen_helper_sve_ldffbds_zss,
5866 gen_helper_sve_ldffhds_le_zss,
5867 gen_helper_sve_ldffsds_le_zss,
5868 NULL, },
5869 { gen_helper_sve_ldffbdu_zss,
5870 gen_helper_sve_ldffhdu_le_zss,
5871 gen_helper_sve_ldffsdu_le_zss,
5872 gen_helper_sve_ldffdd_le_zss, } },
5873 { { gen_helper_sve_ldffbds_zd,
5874 gen_helper_sve_ldffhds_le_zd,
5875 gen_helper_sve_ldffsds_le_zd,
5876 NULL, },
5877 { gen_helper_sve_ldffbdu_zd,
5878 gen_helper_sve_ldffhdu_le_zd,
5879 gen_helper_sve_ldffsdu_le_zd,
5880 gen_helper_sve_ldffdd_le_zd, } } } },
5881 { /* Big-endian */
5882 { { { gen_helper_sve_ldbds_zsu,
5883 gen_helper_sve_ldhds_be_zsu,
5884 gen_helper_sve_ldsds_be_zsu,
5885 NULL, },
5886 { gen_helper_sve_ldbdu_zsu,
5887 gen_helper_sve_ldhdu_be_zsu,
5888 gen_helper_sve_ldsdu_be_zsu,
5889 gen_helper_sve_lddd_be_zsu, } },
5890 { { gen_helper_sve_ldbds_zss,
5891 gen_helper_sve_ldhds_be_zss,
5892 gen_helper_sve_ldsds_be_zss,
5893 NULL, },
5894 { gen_helper_sve_ldbdu_zss,
5895 gen_helper_sve_ldhdu_be_zss,
5896 gen_helper_sve_ldsdu_be_zss,
5897 gen_helper_sve_lddd_be_zss, } },
5898 { { gen_helper_sve_ldbds_zd,
5899 gen_helper_sve_ldhds_be_zd,
5900 gen_helper_sve_ldsds_be_zd,
5901 NULL, },
5902 { gen_helper_sve_ldbdu_zd,
5903 gen_helper_sve_ldhdu_be_zd,
5904 gen_helper_sve_ldsdu_be_zd,
5905 gen_helper_sve_lddd_be_zd, } } },
5906
5907 /* First-fault */
5908 { { { gen_helper_sve_ldffbds_zsu,
5909 gen_helper_sve_ldffhds_be_zsu,
5910 gen_helper_sve_ldffsds_be_zsu,
5911 NULL, },
5912 { gen_helper_sve_ldffbdu_zsu,
5913 gen_helper_sve_ldffhdu_be_zsu,
5914 gen_helper_sve_ldffsdu_be_zsu,
5915 gen_helper_sve_ldffdd_be_zsu, } },
5916 { { gen_helper_sve_ldffbds_zss,
5917 gen_helper_sve_ldffhds_be_zss,
5918 gen_helper_sve_ldffsds_be_zss,
5919 NULL, },
5920 { gen_helper_sve_ldffbdu_zss,
5921 gen_helper_sve_ldffhdu_be_zss,
5922 gen_helper_sve_ldffsdu_be_zss,
5923 gen_helper_sve_ldffdd_be_zss, } },
5924 { { gen_helper_sve_ldffbds_zd,
5925 gen_helper_sve_ldffhds_be_zd,
5926 gen_helper_sve_ldffsds_be_zd,
5927 NULL, },
5928 { gen_helper_sve_ldffbdu_zd,
5929 gen_helper_sve_ldffhdu_be_zd,
5930 gen_helper_sve_ldffsdu_be_zd,
5931 gen_helper_sve_ldffdd_be_zd, } } } } },
5932 { /* MTE Active */
5933 { /* Little-endian */
5934 { { { gen_helper_sve_ldbds_zsu_mte,
5935 gen_helper_sve_ldhds_le_zsu_mte,
5936 gen_helper_sve_ldsds_le_zsu_mte,
5937 NULL, },
5938 { gen_helper_sve_ldbdu_zsu_mte,
5939 gen_helper_sve_ldhdu_le_zsu_mte,
5940 gen_helper_sve_ldsdu_le_zsu_mte,
5941 gen_helper_sve_lddd_le_zsu_mte, } },
5942 { { gen_helper_sve_ldbds_zss_mte,
5943 gen_helper_sve_ldhds_le_zss_mte,
5944 gen_helper_sve_ldsds_le_zss_mte,
5945 NULL, },
5946 { gen_helper_sve_ldbdu_zss_mte,
5947 gen_helper_sve_ldhdu_le_zss_mte,
5948 gen_helper_sve_ldsdu_le_zss_mte,
5949 gen_helper_sve_lddd_le_zss_mte, } },
5950 { { gen_helper_sve_ldbds_zd_mte,
5951 gen_helper_sve_ldhds_le_zd_mte,
5952 gen_helper_sve_ldsds_le_zd_mte,
5953 NULL, },
5954 { gen_helper_sve_ldbdu_zd_mte,
5955 gen_helper_sve_ldhdu_le_zd_mte,
5956 gen_helper_sve_ldsdu_le_zd_mte,
5957 gen_helper_sve_lddd_le_zd_mte, } } },
5958
5959 /* First-fault */
5960 { { { gen_helper_sve_ldffbds_zsu_mte,
5961 gen_helper_sve_ldffhds_le_zsu_mte,
5962 gen_helper_sve_ldffsds_le_zsu_mte,
5963 NULL, },
5964 { gen_helper_sve_ldffbdu_zsu_mte,
5965 gen_helper_sve_ldffhdu_le_zsu_mte,
5966 gen_helper_sve_ldffsdu_le_zsu_mte,
5967 gen_helper_sve_ldffdd_le_zsu_mte, } },
5968 { { gen_helper_sve_ldffbds_zss_mte,
5969 gen_helper_sve_ldffhds_le_zss_mte,
5970 gen_helper_sve_ldffsds_le_zss_mte,
5971 NULL, },
5972 { gen_helper_sve_ldffbdu_zss_mte,
5973 gen_helper_sve_ldffhdu_le_zss_mte,
5974 gen_helper_sve_ldffsdu_le_zss_mte,
5975 gen_helper_sve_ldffdd_le_zss_mte, } },
5976 { { gen_helper_sve_ldffbds_zd_mte,
5977 gen_helper_sve_ldffhds_le_zd_mte,
5978 gen_helper_sve_ldffsds_le_zd_mte,
5979 NULL, },
5980 { gen_helper_sve_ldffbdu_zd_mte,
5981 gen_helper_sve_ldffhdu_le_zd_mte,
5982 gen_helper_sve_ldffsdu_le_zd_mte,
5983 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5984 { /* Big-endian */
5985 { { { gen_helper_sve_ldbds_zsu_mte,
5986 gen_helper_sve_ldhds_be_zsu_mte,
5987 gen_helper_sve_ldsds_be_zsu_mte,
5988 NULL, },
5989 { gen_helper_sve_ldbdu_zsu_mte,
5990 gen_helper_sve_ldhdu_be_zsu_mte,
5991 gen_helper_sve_ldsdu_be_zsu_mte,
5992 gen_helper_sve_lddd_be_zsu_mte, } },
5993 { { gen_helper_sve_ldbds_zss_mte,
5994 gen_helper_sve_ldhds_be_zss_mte,
5995 gen_helper_sve_ldsds_be_zss_mte,
5996 NULL, },
5997 { gen_helper_sve_ldbdu_zss_mte,
5998 gen_helper_sve_ldhdu_be_zss_mte,
5999 gen_helper_sve_ldsdu_be_zss_mte,
6000 gen_helper_sve_lddd_be_zss_mte, } },
6001 { { gen_helper_sve_ldbds_zd_mte,
6002 gen_helper_sve_ldhds_be_zd_mte,
6003 gen_helper_sve_ldsds_be_zd_mte,
6004 NULL, },
6005 { gen_helper_sve_ldbdu_zd_mte,
6006 gen_helper_sve_ldhdu_be_zd_mte,
6007 gen_helper_sve_ldsdu_be_zd_mte,
6008 gen_helper_sve_lddd_be_zd_mte, } } },
6009
6010 /* First-fault */
6011 { { { gen_helper_sve_ldffbds_zsu_mte,
6012 gen_helper_sve_ldffhds_be_zsu_mte,
6013 gen_helper_sve_ldffsds_be_zsu_mte,
6014 NULL, },
6015 { gen_helper_sve_ldffbdu_zsu_mte,
6016 gen_helper_sve_ldffhdu_be_zsu_mte,
6017 gen_helper_sve_ldffsdu_be_zsu_mte,
6018 gen_helper_sve_ldffdd_be_zsu_mte, } },
6019 { { gen_helper_sve_ldffbds_zss_mte,
6020 gen_helper_sve_ldffhds_be_zss_mte,
6021 gen_helper_sve_ldffsds_be_zss_mte,
6022 NULL, },
6023 { gen_helper_sve_ldffbdu_zss_mte,
6024 gen_helper_sve_ldffhdu_be_zss_mte,
6025 gen_helper_sve_ldffsdu_be_zss_mte,
6026 gen_helper_sve_ldffdd_be_zss_mte, } },
6027 { { gen_helper_sve_ldffbds_zd_mte,
6028 gen_helper_sve_ldffhds_be_zd_mte,
6029 gen_helper_sve_ldffsds_be_zd_mte,
6030 NULL, },
6031 { gen_helper_sve_ldffbdu_zd_mte,
6032 gen_helper_sve_ldffhdu_be_zd_mte,
6033 gen_helper_sve_ldffsdu_be_zd_mte,
6034 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6035};
6036
3a7be554 6037static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6038{
6039 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6040 bool be = s->be_data == MO_BE;
6041 bool mte = s->mte_active[0];
673e9fa6
RH
6042
6043 if (!sve_access_check(s)) {
6044 return true;
6045 }
6046
6047 switch (a->esz) {
6048 case MO_32:
d28d12f0 6049 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6050 break;
6051 case MO_64:
d28d12f0 6052 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6053 break;
6054 }
6055 assert(fn != NULL);
6056
6057 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6058 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6059 return true;
6060}
6061
3a7be554 6062static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6063{
6064 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6065 bool be = s->be_data == MO_BE;
6066 bool mte = s->mte_active[0];
673e9fa6
RH
6067
6068 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6069 return false;
6070 }
6071 if (!sve_access_check(s)) {
6072 return true;
6073 }
6074
6075 switch (a->esz) {
6076 case MO_32:
d28d12f0 6077 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6078 break;
6079 case MO_64:
d28d12f0 6080 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6081 break;
6082 }
6083 assert(fn != NULL);
6084
6085 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6086 * by loading the immediate into the scalar parameter.
6087 */
2ccdf94f
RH
6088 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6089 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6090 return true;
6091}
6092
cf327449
SL
6093static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6094{
b17ab470
RH
6095 gen_helper_gvec_mem_scatter *fn = NULL;
6096 bool be = s->be_data == MO_BE;
6097 bool mte = s->mte_active[0];
6098
6099 if (a->esz < a->msz + !a->u) {
6100 return false;
6101 }
cf327449
SL
6102 if (!dc_isar_feature(aa64_sve2, s)) {
6103 return false;
6104 }
b17ab470
RH
6105 if (!sve_access_check(s)) {
6106 return true;
6107 }
6108
6109 switch (a->esz) {
6110 case MO_32:
6111 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6112 break;
6113 case MO_64:
6114 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6115 break;
6116 }
6117 assert(fn != NULL);
6118
6119 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6120 cpu_reg(s, a->rm), a->msz, false, fn);
6121 return true;
cf327449
SL
6122}
6123
d28d12f0
RH
6124/* Indexed by [mte][be][xs][msz]. */
6125static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6126 { /* MTE Inactive */
6127 { /* Little-endian */
6128 { gen_helper_sve_stbs_zsu,
6129 gen_helper_sve_sths_le_zsu,
6130 gen_helper_sve_stss_le_zsu, },
6131 { gen_helper_sve_stbs_zss,
6132 gen_helper_sve_sths_le_zss,
6133 gen_helper_sve_stss_le_zss, } },
6134 { /* Big-endian */
6135 { gen_helper_sve_stbs_zsu,
6136 gen_helper_sve_sths_be_zsu,
6137 gen_helper_sve_stss_be_zsu, },
6138 { gen_helper_sve_stbs_zss,
6139 gen_helper_sve_sths_be_zss,
6140 gen_helper_sve_stss_be_zss, } } },
6141 { /* MTE Active */
6142 { /* Little-endian */
6143 { gen_helper_sve_stbs_zsu_mte,
6144 gen_helper_sve_sths_le_zsu_mte,
6145 gen_helper_sve_stss_le_zsu_mte, },
6146 { gen_helper_sve_stbs_zss_mte,
6147 gen_helper_sve_sths_le_zss_mte,
6148 gen_helper_sve_stss_le_zss_mte, } },
6149 { /* Big-endian */
6150 { gen_helper_sve_stbs_zsu_mte,
6151 gen_helper_sve_sths_be_zsu_mte,
6152 gen_helper_sve_stss_be_zsu_mte, },
6153 { gen_helper_sve_stbs_zss_mte,
6154 gen_helper_sve_sths_be_zss_mte,
6155 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6156};
6157
6158/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6159static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6160 { /* MTE Inactive */
6161 { /* Little-endian */
6162 { gen_helper_sve_stbd_zsu,
6163 gen_helper_sve_sthd_le_zsu,
6164 gen_helper_sve_stsd_le_zsu,
6165 gen_helper_sve_stdd_le_zsu, },
6166 { gen_helper_sve_stbd_zss,
6167 gen_helper_sve_sthd_le_zss,
6168 gen_helper_sve_stsd_le_zss,
6169 gen_helper_sve_stdd_le_zss, },
6170 { gen_helper_sve_stbd_zd,
6171 gen_helper_sve_sthd_le_zd,
6172 gen_helper_sve_stsd_le_zd,
6173 gen_helper_sve_stdd_le_zd, } },
6174 { /* Big-endian */
6175 { gen_helper_sve_stbd_zsu,
6176 gen_helper_sve_sthd_be_zsu,
6177 gen_helper_sve_stsd_be_zsu,
6178 gen_helper_sve_stdd_be_zsu, },
6179 { gen_helper_sve_stbd_zss,
6180 gen_helper_sve_sthd_be_zss,
6181 gen_helper_sve_stsd_be_zss,
6182 gen_helper_sve_stdd_be_zss, },
6183 { gen_helper_sve_stbd_zd,
6184 gen_helper_sve_sthd_be_zd,
6185 gen_helper_sve_stsd_be_zd,
6186 gen_helper_sve_stdd_be_zd, } } },
6187 { /* MTE Inactive */
6188 { /* Little-endian */
6189 { gen_helper_sve_stbd_zsu_mte,
6190 gen_helper_sve_sthd_le_zsu_mte,
6191 gen_helper_sve_stsd_le_zsu_mte,
6192 gen_helper_sve_stdd_le_zsu_mte, },
6193 { gen_helper_sve_stbd_zss_mte,
6194 gen_helper_sve_sthd_le_zss_mte,
6195 gen_helper_sve_stsd_le_zss_mte,
6196 gen_helper_sve_stdd_le_zss_mte, },
6197 { gen_helper_sve_stbd_zd_mte,
6198 gen_helper_sve_sthd_le_zd_mte,
6199 gen_helper_sve_stsd_le_zd_mte,
6200 gen_helper_sve_stdd_le_zd_mte, } },
6201 { /* Big-endian */
6202 { gen_helper_sve_stbd_zsu_mte,
6203 gen_helper_sve_sthd_be_zsu_mte,
6204 gen_helper_sve_stsd_be_zsu_mte,
6205 gen_helper_sve_stdd_be_zsu_mte, },
6206 { gen_helper_sve_stbd_zss_mte,
6207 gen_helper_sve_sthd_be_zss_mte,
6208 gen_helper_sve_stsd_be_zss_mte,
6209 gen_helper_sve_stdd_be_zss_mte, },
6210 { gen_helper_sve_stbd_zd_mte,
6211 gen_helper_sve_sthd_be_zd_mte,
6212 gen_helper_sve_stsd_be_zd_mte,
6213 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6214};
6215
3a7be554 6216static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6217{
f6dbf62a 6218 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6219 bool be = s->be_data == MO_BE;
6220 bool mte = s->mte_active[0];
f6dbf62a
RH
6221
6222 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6223 return false;
6224 }
6225 if (!sve_access_check(s)) {
6226 return true;
6227 }
6228 switch (a->esz) {
6229 case MO_32:
d28d12f0 6230 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6231 break;
6232 case MO_64:
d28d12f0 6233 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6234 break;
6235 default:
6236 g_assert_not_reached();
6237 }
6238 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6239 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6240 return true;
6241}
dec6cf6b 6242
3a7be554 6243static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6244{
6245 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6246 bool be = s->be_data == MO_BE;
6247 bool mte = s->mte_active[0];
408ecde9
RH
6248
6249 if (a->esz < a->msz) {
6250 return false;
6251 }
6252 if (!sve_access_check(s)) {
6253 return true;
6254 }
6255
6256 switch (a->esz) {
6257 case MO_32:
d28d12f0 6258 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6259 break;
6260 case MO_64:
d28d12f0 6261 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6262 break;
6263 }
6264 assert(fn != NULL);
6265
6266 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6267 * by loading the immediate into the scalar parameter.
6268 */
2ccdf94f
RH
6269 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6270 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6271 return true;
6272}
6273
6ebca45f
SL
6274static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6275{
b17ab470
RH
6276 gen_helper_gvec_mem_scatter *fn;
6277 bool be = s->be_data == MO_BE;
6278 bool mte = s->mte_active[0];
6279
6280 if (a->esz < a->msz) {
6281 return false;
6282 }
6ebca45f
SL
6283 if (!dc_isar_feature(aa64_sve2, s)) {
6284 return false;
6285 }
b17ab470
RH
6286 if (!sve_access_check(s)) {
6287 return true;
6288 }
6289
6290 switch (a->esz) {
6291 case MO_32:
6292 fn = scatter_store_fn32[mte][be][0][a->msz];
6293 break;
6294 case MO_64:
6295 fn = scatter_store_fn64[mte][be][2][a->msz];
6296 break;
6297 default:
6298 g_assert_not_reached();
6299 }
6300
6301 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6302 cpu_reg(s, a->rm), a->msz, true, fn);
6303 return true;
6ebca45f
SL
6304}
6305
dec6cf6b
RH
6306/*
6307 * Prefetches
6308 */
6309
3a7be554 6310static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6311{
6312 /* Prefetch is a nop within QEMU. */
2f95a3b0 6313 (void)sve_access_check(s);
dec6cf6b
RH
6314 return true;
6315}
6316
3a7be554 6317static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6318{
6319 if (a->rm == 31) {
6320 return false;
6321 }
6322 /* Prefetch is a nop within QEMU. */
2f95a3b0 6323 (void)sve_access_check(s);
dec6cf6b
RH
6324 return true;
6325}
a2103582
RH
6326
6327/*
6328 * Move Prefix
6329 *
6330 * TODO: The implementation so far could handle predicated merging movprfx.
6331 * The helper functions as written take an extra source register to
6332 * use in the operation, but the result is only written when predication
6333 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6334 * to allow the final write back to the destination to be unconditional.
6335 * For predicated zeroing movprfx, we need to rearrange the helpers to
6336 * allow the final write back to zero inactives.
6337 *
6338 * In the meantime, just emit the moves.
6339 */
6340
3a7be554 6341static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6342{
6343 return do_mov_z(s, a->rd, a->rn);
6344}
6345
3a7be554 6346static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582 6347{
68cc4ee3 6348 return do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
a2103582
RH
6349}
6350
3a7be554 6351static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6352{
60245996 6353 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6354}
5dad1ba5
RH
6355
6356/*
6357 * SVE2 Integer Multiply - Unpredicated
6358 */
6359
b262215b 6360TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 6361
bd394cf5
RH
6362static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6363 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6364 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6365};
6366TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6367 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6368
bd394cf5
RH
6369static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6370 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6371 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6372};
6373TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6374 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6375
bd394cf5
RH
6376TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6377 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6378
bd394cf5
RH
6379static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6380 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6381 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6382};
6383TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6384 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6385
bd394cf5
RH
6386static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6387 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6388 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6389};
6390TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6391 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6392
d4b1e59d
RH
6393/*
6394 * SVE2 Integer - Predicated
6395 */
6396
5880bdc0
RH
6397static gen_helper_gvec_4 * const sadlp_fns[4] = {
6398 NULL, gen_helper_sve2_sadalp_zpzz_h,
6399 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6400};
6401TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6402 sadlp_fns[a->esz], a, 0)
d4b1e59d 6403
5880bdc0
RH
6404static gen_helper_gvec_4 * const uadlp_fns[4] = {
6405 NULL, gen_helper_sve2_uadalp_zpzz_h,
6406 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6407};
6408TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6409 uadlp_fns[a->esz], a, 0)
db366da8
RH
6410
6411/*
6412 * SVE2 integer unary operations (predicated)
6413 */
6414
b2c00961
RH
6415TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6416 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6417
b2c00961
RH
6418TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6419 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6420
b2c00961
RH
6421static gen_helper_gvec_3 * const sqabs_fns[4] = {
6422 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6423 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6424};
6425TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6426
b2c00961
RH
6427static gen_helper_gvec_3 * const sqneg_fns[4] = {
6428 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6429 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6430};
6431TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 6432
5880bdc0
RH
6433DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6434DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6435DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 6436
5880bdc0
RH
6437DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6438DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6439DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 6440
5880bdc0
RH
6441DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6442DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6443DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 6444
5880bdc0
RH
6445DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6446DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6447DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 6448
5880bdc0
RH
6449DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6450DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6451DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6452DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6453DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6454
5880bdc0
RH
6455DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6456DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6457DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6458DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6459DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6460DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6461
6462/*
6463 * SVE2 Widening Integer Arithmetic
6464 */
6465
615f19fe
RH
6466static gen_helper_gvec_3 * const saddl_fns[4] = {
6467 NULL, gen_helper_sve2_saddl_h,
6468 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6469};
6470TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6471 saddl_fns[a->esz], a, 0)
6472TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6473 saddl_fns[a->esz], a, 3)
6474TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6475 saddl_fns[a->esz], a, 2)
6476
6477static gen_helper_gvec_3 * const ssubl_fns[4] = {
6478 NULL, gen_helper_sve2_ssubl_h,
6479 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6480};
6481TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6482 ssubl_fns[a->esz], a, 0)
6483TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6484 ssubl_fns[a->esz], a, 3)
6485TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6486 ssubl_fns[a->esz], a, 2)
6487TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6488 ssubl_fns[a->esz], a, 1)
6489
6490static gen_helper_gvec_3 * const sabdl_fns[4] = {
6491 NULL, gen_helper_sve2_sabdl_h,
6492 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6493};
6494TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6495 sabdl_fns[a->esz], a, 0)
6496TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6497 sabdl_fns[a->esz], a, 3)
6498
6499static gen_helper_gvec_3 * const uaddl_fns[4] = {
6500 NULL, gen_helper_sve2_uaddl_h,
6501 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6502};
6503TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6504 uaddl_fns[a->esz], a, 0)
6505TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6506 uaddl_fns[a->esz], a, 3)
6507
6508static gen_helper_gvec_3 * const usubl_fns[4] = {
6509 NULL, gen_helper_sve2_usubl_h,
6510 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6511};
6512TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6513 usubl_fns[a->esz], a, 0)
6514TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6515 usubl_fns[a->esz], a, 3)
6516
6517static gen_helper_gvec_3 * const uabdl_fns[4] = {
6518 NULL, gen_helper_sve2_uabdl_h,
6519 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6520};
6521TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6522 uabdl_fns[a->esz], a, 0)
6523TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6524 uabdl_fns[a->esz], a, 3)
6525
6526static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6527 NULL, gen_helper_sve2_sqdmull_zzz_h,
6528 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6529};
6530TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6531 sqdmull_fns[a->esz], a, 0)
6532TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6533 sqdmull_fns[a->esz], a, 3)
6534
6535static gen_helper_gvec_3 * const smull_fns[4] = {
6536 NULL, gen_helper_sve2_smull_zzz_h,
6537 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6538};
6539TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6540 smull_fns[a->esz], a, 0)
6541TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6542 smull_fns[a->esz], a, 3)
6543
6544static gen_helper_gvec_3 * const umull_fns[4] = {
6545 NULL, gen_helper_sve2_umull_zzz_h,
6546 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6547};
6548TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6549 umull_fns[a->esz], a, 0)
6550TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6551 umull_fns[a->esz], a, 3)
6552
6553static gen_helper_gvec_3 * const eoril_fns[4] = {
6554 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6555 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6556};
6557TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6558TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6559
e3a56131
RH
6560static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6561{
6562 static gen_helper_gvec_3 * const fns[4] = {
6563 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6564 NULL, gen_helper_sve2_pmull_d,
6565 };
6566 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6567 return false;
6568 }
615f19fe 6569 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6570}
6571
615f19fe
RH
6572TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6573TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6574
615f19fe
RH
6575static gen_helper_gvec_3 * const saddw_fns[4] = {
6576 NULL, gen_helper_sve2_saddw_h,
6577 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6578};
6579TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6580TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6581
615f19fe
RH
6582static gen_helper_gvec_3 * const ssubw_fns[4] = {
6583 NULL, gen_helper_sve2_ssubw_h,
6584 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6585};
6586TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6587TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6588
615f19fe
RH
6589static gen_helper_gvec_3 * const uaddw_fns[4] = {
6590 NULL, gen_helper_sve2_uaddw_h,
6591 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6592};
6593TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6594TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6595
615f19fe
RH
6596static gen_helper_gvec_3 * const usubw_fns[4] = {
6597 NULL, gen_helper_sve2_usubw_h,
6598 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6599};
6600TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6601TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6602
6603static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6604{
6605 int top = imm & 1;
6606 int shl = imm >> 1;
6607 int halfbits = 4 << vece;
6608
6609 if (top) {
6610 if (shl == halfbits) {
6611 TCGv_vec t = tcg_temp_new_vec_matching(d);
6612 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6613 tcg_gen_and_vec(vece, d, n, t);
6614 tcg_temp_free_vec(t);
6615 } else {
6616 tcg_gen_sari_vec(vece, d, n, halfbits);
6617 tcg_gen_shli_vec(vece, d, d, shl);
6618 }
6619 } else {
6620 tcg_gen_shli_vec(vece, d, n, halfbits);
6621 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6622 }
6623}
6624
6625static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6626{
6627 int halfbits = 4 << vece;
6628 int top = imm & 1;
6629 int shl = (imm >> 1);
6630 int shift;
6631 uint64_t mask;
6632
6633 mask = MAKE_64BIT_MASK(0, halfbits);
6634 mask <<= shl;
6635 mask = dup_const(vece, mask);
6636
6637 shift = shl - top * halfbits;
6638 if (shift < 0) {
6639 tcg_gen_shri_i64(d, n, -shift);
6640 } else {
6641 tcg_gen_shli_i64(d, n, shift);
6642 }
6643 tcg_gen_andi_i64(d, d, mask);
6644}
6645
6646static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6647{
6648 gen_ushll_i64(MO_16, d, n, imm);
6649}
6650
6651static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6652{
6653 gen_ushll_i64(MO_32, d, n, imm);
6654}
6655
6656static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6657{
6658 gen_ushll_i64(MO_64, d, n, imm);
6659}
6660
6661static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6662{
6663 int halfbits = 4 << vece;
6664 int top = imm & 1;
6665 int shl = imm >> 1;
6666
6667 if (top) {
6668 if (shl == halfbits) {
6669 TCGv_vec t = tcg_temp_new_vec_matching(d);
6670 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6671 tcg_gen_and_vec(vece, d, n, t);
6672 tcg_temp_free_vec(t);
6673 } else {
6674 tcg_gen_shri_vec(vece, d, n, halfbits);
6675 tcg_gen_shli_vec(vece, d, d, shl);
6676 }
6677 } else {
6678 if (shl == 0) {
6679 TCGv_vec t = tcg_temp_new_vec_matching(d);
6680 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6681 tcg_gen_and_vec(vece, d, n, t);
6682 tcg_temp_free_vec(t);
6683 } else {
6684 tcg_gen_shli_vec(vece, d, n, halfbits);
6685 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6686 }
6687 }
6688}
6689
6690static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6691 bool sel, bool uns)
6692{
6693 static const TCGOpcode sshll_list[] = {
6694 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6695 };
6696 static const TCGOpcode ushll_list[] = {
6697 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6698 };
6699 static const GVecGen2i ops[2][3] = {
6700 { { .fniv = gen_sshll_vec,
6701 .opt_opc = sshll_list,
6702 .fno = gen_helper_sve2_sshll_h,
6703 .vece = MO_16 },
6704 { .fniv = gen_sshll_vec,
6705 .opt_opc = sshll_list,
6706 .fno = gen_helper_sve2_sshll_s,
6707 .vece = MO_32 },
6708 { .fniv = gen_sshll_vec,
6709 .opt_opc = sshll_list,
6710 .fno = gen_helper_sve2_sshll_d,
6711 .vece = MO_64 } },
6712 { { .fni8 = gen_ushll16_i64,
6713 .fniv = gen_ushll_vec,
6714 .opt_opc = ushll_list,
6715 .fno = gen_helper_sve2_ushll_h,
6716 .vece = MO_16 },
6717 { .fni8 = gen_ushll32_i64,
6718 .fniv = gen_ushll_vec,
6719 .opt_opc = ushll_list,
6720 .fno = gen_helper_sve2_ushll_s,
6721 .vece = MO_32 },
6722 { .fni8 = gen_ushll64_i64,
6723 .fniv = gen_ushll_vec,
6724 .opt_opc = ushll_list,
6725 .fno = gen_helper_sve2_ushll_d,
6726 .vece = MO_64 } },
6727 };
6728
6729 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6730 return false;
6731 }
6732 if (sve_access_check(s)) {
6733 unsigned vsz = vec_full_reg_size(s);
6734 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6735 vec_full_reg_offset(s, a->rn),
6736 vsz, vsz, (a->imm << 1) | sel,
6737 &ops[uns][a->esz]);
6738 }
6739 return true;
6740}
6741
6742static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6743{
6744 return do_sve2_shll_tb(s, a, false, false);
6745}
6746
6747static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6748{
6749 return do_sve2_shll_tb(s, a, true, false);
6750}
6751
6752static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6753{
6754 return do_sve2_shll_tb(s, a, false, true);
6755}
6756
6757static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6758{
6759 return do_sve2_shll_tb(s, a, true, true);
6760}
cb9c33b8 6761
615f19fe
RH
6762static gen_helper_gvec_3 * const bext_fns[4] = {
6763 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6764 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6765};
6766TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6767 bext_fns[a->esz], a, 0)
ed4a6387 6768
615f19fe
RH
6769static gen_helper_gvec_3 * const bdep_fns[4] = {
6770 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6771 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6772};
6773TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6774 bdep_fns[a->esz], a, 0)
ed4a6387 6775
615f19fe
RH
6776static gen_helper_gvec_3 * const bgrp_fns[4] = {
6777 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6778 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6779};
6780TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6781 bgrp_fns[a->esz], a, 0)
ed4a6387 6782
615f19fe
RH
6783static gen_helper_gvec_3 * const cadd_fns[4] = {
6784 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6785 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6786};
6787TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6788 cadd_fns[a->esz], a, 0)
6789TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6790 cadd_fns[a->esz], a, 1)
6791
6792static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6793 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6794 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6795};
6796TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6797 sqcadd_fns[a->esz], a, 0)
6798TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6799 sqcadd_fns[a->esz], a, 1)
38650638 6800
eeb4e84d
RH
6801static gen_helper_gvec_4 * const sabal_fns[4] = {
6802 NULL, gen_helper_sve2_sabal_h,
6803 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6804};
6805TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6806TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6807
eeb4e84d
RH
6808static gen_helper_gvec_4 * const uabal_fns[4] = {
6809 NULL, gen_helper_sve2_uabal_h,
6810 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6811};
6812TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6813TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6814
6815static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6816{
6817 static gen_helper_gvec_4 * const fns[2] = {
6818 gen_helper_sve2_adcl_s,
6819 gen_helper_sve2_adcl_d,
6820 };
6821 /*
6822 * Note that in this case the ESZ field encodes both size and sign.
6823 * Split out 'subtract' into bit 1 of the data field for the helper.
6824 */
eeb4e84d 6825 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6826}
6827
eeb4e84d
RH
6828TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6829TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e
RH
6830
6831static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
6832{
ada378f0 6833 if (!dc_isar_feature(aa64_sve2, s)) {
a7e3a90e
RH
6834 return false;
6835 }
ada378f0 6836 return gen_gvec_fn_arg_zzi(s, fn, a);
a7e3a90e
RH
6837}
6838
6839static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
6840{
6841 return do_sve2_fn2i(s, a, gen_gvec_ssra);
6842}
6843
6844static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
6845{
6846 return do_sve2_fn2i(s, a, gen_gvec_usra);
6847}
6848
6849static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
6850{
6851 return do_sve2_fn2i(s, a, gen_gvec_srsra);
6852}
6853
6854static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
6855{
6856 return do_sve2_fn2i(s, a, gen_gvec_ursra);
6857}
fc12b46a
RH
6858
6859static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
6860{
6861 return do_sve2_fn2i(s, a, gen_gvec_sri);
6862}
6863
6864static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
6865{
6866 return do_sve2_fn2i(s, a, gen_gvec_sli);
6867}
289a1797 6868
79828dcb
RH
6869TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6870TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d
RH
6871
6872static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6873 const GVecGen2 ops[3])
6874{
6875 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6876 !dc_isar_feature(aa64_sve2, s)) {
6877 return false;
6878 }
6879 if (sve_access_check(s)) {
6880 unsigned vsz = vec_full_reg_size(s);
6881 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6882 vec_full_reg_offset(s, a->rn),
6883 vsz, vsz, &ops[a->esz]);
6884 }
6885 return true;
6886}
6887
6888static const TCGOpcode sqxtn_list[] = {
6889 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6890};
6891
6892static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6893{
6894 TCGv_vec t = tcg_temp_new_vec_matching(d);
6895 int halfbits = 4 << vece;
6896 int64_t mask = (1ull << halfbits) - 1;
6897 int64_t min = -1ull << (halfbits - 1);
6898 int64_t max = -min - 1;
6899
6900 tcg_gen_dupi_vec(vece, t, min);
6901 tcg_gen_smax_vec(vece, d, n, t);
6902 tcg_gen_dupi_vec(vece, t, max);
6903 tcg_gen_smin_vec(vece, d, d, t);
6904 tcg_gen_dupi_vec(vece, t, mask);
6905 tcg_gen_and_vec(vece, d, d, t);
6906 tcg_temp_free_vec(t);
6907}
6908
6909static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6910{
6911 static const GVecGen2 ops[3] = {
6912 { .fniv = gen_sqxtnb_vec,
6913 .opt_opc = sqxtn_list,
6914 .fno = gen_helper_sve2_sqxtnb_h,
6915 .vece = MO_16 },
6916 { .fniv = gen_sqxtnb_vec,
6917 .opt_opc = sqxtn_list,
6918 .fno = gen_helper_sve2_sqxtnb_s,
6919 .vece = MO_32 },
6920 { .fniv = gen_sqxtnb_vec,
6921 .opt_opc = sqxtn_list,
6922 .fno = gen_helper_sve2_sqxtnb_d,
6923 .vece = MO_64 },
6924 };
6925 return do_sve2_narrow_extract(s, a, ops);
6926}
6927
6928static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6929{
6930 TCGv_vec t = tcg_temp_new_vec_matching(d);
6931 int halfbits = 4 << vece;
6932 int64_t mask = (1ull << halfbits) - 1;
6933 int64_t min = -1ull << (halfbits - 1);
6934 int64_t max = -min - 1;
6935
6936 tcg_gen_dupi_vec(vece, t, min);
6937 tcg_gen_smax_vec(vece, n, n, t);
6938 tcg_gen_dupi_vec(vece, t, max);
6939 tcg_gen_smin_vec(vece, n, n, t);
6940 tcg_gen_shli_vec(vece, n, n, halfbits);
6941 tcg_gen_dupi_vec(vece, t, mask);
6942 tcg_gen_bitsel_vec(vece, d, t, d, n);
6943 tcg_temp_free_vec(t);
6944}
6945
6946static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6947{
6948 static const GVecGen2 ops[3] = {
6949 { .fniv = gen_sqxtnt_vec,
6950 .opt_opc = sqxtn_list,
6951 .load_dest = true,
6952 .fno = gen_helper_sve2_sqxtnt_h,
6953 .vece = MO_16 },
6954 { .fniv = gen_sqxtnt_vec,
6955 .opt_opc = sqxtn_list,
6956 .load_dest = true,
6957 .fno = gen_helper_sve2_sqxtnt_s,
6958 .vece = MO_32 },
6959 { .fniv = gen_sqxtnt_vec,
6960 .opt_opc = sqxtn_list,
6961 .load_dest = true,
6962 .fno = gen_helper_sve2_sqxtnt_d,
6963 .vece = MO_64 },
6964 };
6965 return do_sve2_narrow_extract(s, a, ops);
6966}
6967
6968static const TCGOpcode uqxtn_list[] = {
6969 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6970};
6971
6972static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6973{
6974 TCGv_vec t = tcg_temp_new_vec_matching(d);
6975 int halfbits = 4 << vece;
6976 int64_t max = (1ull << halfbits) - 1;
6977
6978 tcg_gen_dupi_vec(vece, t, max);
6979 tcg_gen_umin_vec(vece, d, n, t);
6980 tcg_temp_free_vec(t);
6981}
6982
6983static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6984{
6985 static const GVecGen2 ops[3] = {
6986 { .fniv = gen_uqxtnb_vec,
6987 .opt_opc = uqxtn_list,
6988 .fno = gen_helper_sve2_uqxtnb_h,
6989 .vece = MO_16 },
6990 { .fniv = gen_uqxtnb_vec,
6991 .opt_opc = uqxtn_list,
6992 .fno = gen_helper_sve2_uqxtnb_s,
6993 .vece = MO_32 },
6994 { .fniv = gen_uqxtnb_vec,
6995 .opt_opc = uqxtn_list,
6996 .fno = gen_helper_sve2_uqxtnb_d,
6997 .vece = MO_64 },
6998 };
6999 return do_sve2_narrow_extract(s, a, ops);
7000}
7001
7002static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7003{
7004 TCGv_vec t = tcg_temp_new_vec_matching(d);
7005 int halfbits = 4 << vece;
7006 int64_t max = (1ull << halfbits) - 1;
7007
7008 tcg_gen_dupi_vec(vece, t, max);
7009 tcg_gen_umin_vec(vece, n, n, t);
7010 tcg_gen_shli_vec(vece, n, n, halfbits);
7011 tcg_gen_bitsel_vec(vece, d, t, d, n);
7012 tcg_temp_free_vec(t);
7013}
7014
7015static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7016{
7017 static const GVecGen2 ops[3] = {
7018 { .fniv = gen_uqxtnt_vec,
7019 .opt_opc = uqxtn_list,
7020 .load_dest = true,
7021 .fno = gen_helper_sve2_uqxtnt_h,
7022 .vece = MO_16 },
7023 { .fniv = gen_uqxtnt_vec,
7024 .opt_opc = uqxtn_list,
7025 .load_dest = true,
7026 .fno = gen_helper_sve2_uqxtnt_s,
7027 .vece = MO_32 },
7028 { .fniv = gen_uqxtnt_vec,
7029 .opt_opc = uqxtn_list,
7030 .load_dest = true,
7031 .fno = gen_helper_sve2_uqxtnt_d,
7032 .vece = MO_64 },
7033 };
7034 return do_sve2_narrow_extract(s, a, ops);
7035}
7036
7037static const TCGOpcode sqxtun_list[] = {
7038 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7039};
7040
7041static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7042{
7043 TCGv_vec t = tcg_temp_new_vec_matching(d);
7044 int halfbits = 4 << vece;
7045 int64_t max = (1ull << halfbits) - 1;
7046
7047 tcg_gen_dupi_vec(vece, t, 0);
7048 tcg_gen_smax_vec(vece, d, n, t);
7049 tcg_gen_dupi_vec(vece, t, max);
7050 tcg_gen_umin_vec(vece, d, d, t);
7051 tcg_temp_free_vec(t);
7052}
7053
7054static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7055{
7056 static const GVecGen2 ops[3] = {
7057 { .fniv = gen_sqxtunb_vec,
7058 .opt_opc = sqxtun_list,
7059 .fno = gen_helper_sve2_sqxtunb_h,
7060 .vece = MO_16 },
7061 { .fniv = gen_sqxtunb_vec,
7062 .opt_opc = sqxtun_list,
7063 .fno = gen_helper_sve2_sqxtunb_s,
7064 .vece = MO_32 },
7065 { .fniv = gen_sqxtunb_vec,
7066 .opt_opc = sqxtun_list,
7067 .fno = gen_helper_sve2_sqxtunb_d,
7068 .vece = MO_64 },
7069 };
7070 return do_sve2_narrow_extract(s, a, ops);
7071}
7072
7073static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7074{
7075 TCGv_vec t = tcg_temp_new_vec_matching(d);
7076 int halfbits = 4 << vece;
7077 int64_t max = (1ull << halfbits) - 1;
7078
7079 tcg_gen_dupi_vec(vece, t, 0);
7080 tcg_gen_smax_vec(vece, n, n, t);
7081 tcg_gen_dupi_vec(vece, t, max);
7082 tcg_gen_umin_vec(vece, n, n, t);
7083 tcg_gen_shli_vec(vece, n, n, halfbits);
7084 tcg_gen_bitsel_vec(vece, d, t, d, n);
7085 tcg_temp_free_vec(t);
7086}
7087
7088static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7089{
7090 static const GVecGen2 ops[3] = {
7091 { .fniv = gen_sqxtunt_vec,
7092 .opt_opc = sqxtun_list,
7093 .load_dest = true,
7094 .fno = gen_helper_sve2_sqxtunt_h,
7095 .vece = MO_16 },
7096 { .fniv = gen_sqxtunt_vec,
7097 .opt_opc = sqxtun_list,
7098 .load_dest = true,
7099 .fno = gen_helper_sve2_sqxtunt_s,
7100 .vece = MO_32 },
7101 { .fniv = gen_sqxtunt_vec,
7102 .opt_opc = sqxtun_list,
7103 .load_dest = true,
7104 .fno = gen_helper_sve2_sqxtunt_d,
7105 .vece = MO_64 },
7106 };
7107 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7108}
7109
7110static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7111 const GVecGen2i ops[3])
7112{
7113 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7114 return false;
7115 }
7116 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7117 if (sve_access_check(s)) {
7118 unsigned vsz = vec_full_reg_size(s);
7119 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7120 vec_full_reg_offset(s, a->rn),
7121 vsz, vsz, a->imm, &ops[a->esz]);
7122 }
7123 return true;
7124}
7125
7126static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7127{
7128 int halfbits = 4 << vece;
7129 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7130
7131 tcg_gen_shri_i64(d, n, shr);
7132 tcg_gen_andi_i64(d, d, mask);
7133}
7134
7135static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7136{
7137 gen_shrnb_i64(MO_16, d, n, shr);
7138}
7139
7140static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7141{
7142 gen_shrnb_i64(MO_32, d, n, shr);
7143}
7144
7145static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7146{
7147 gen_shrnb_i64(MO_64, d, n, shr);
7148}
7149
7150static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7151{
7152 TCGv_vec t = tcg_temp_new_vec_matching(d);
7153 int halfbits = 4 << vece;
7154 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7155
7156 tcg_gen_shri_vec(vece, n, n, shr);
7157 tcg_gen_dupi_vec(vece, t, mask);
7158 tcg_gen_and_vec(vece, d, n, t);
7159 tcg_temp_free_vec(t);
7160}
7161
7162static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7163{
7164 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7165 static const GVecGen2i ops[3] = {
7166 { .fni8 = gen_shrnb16_i64,
7167 .fniv = gen_shrnb_vec,
7168 .opt_opc = vec_list,
7169 .fno = gen_helper_sve2_shrnb_h,
7170 .vece = MO_16 },
7171 { .fni8 = gen_shrnb32_i64,
7172 .fniv = gen_shrnb_vec,
7173 .opt_opc = vec_list,
7174 .fno = gen_helper_sve2_shrnb_s,
7175 .vece = MO_32 },
7176 { .fni8 = gen_shrnb64_i64,
7177 .fniv = gen_shrnb_vec,
7178 .opt_opc = vec_list,
7179 .fno = gen_helper_sve2_shrnb_d,
7180 .vece = MO_64 },
7181 };
7182 return do_sve2_shr_narrow(s, a, ops);
7183}
7184
7185static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7186{
7187 int halfbits = 4 << vece;
7188 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7189
7190 tcg_gen_shli_i64(n, n, halfbits - shr);
7191 tcg_gen_andi_i64(n, n, ~mask);
7192 tcg_gen_andi_i64(d, d, mask);
7193 tcg_gen_or_i64(d, d, n);
7194}
7195
7196static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7197{
7198 gen_shrnt_i64(MO_16, d, n, shr);
7199}
7200
7201static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7202{
7203 gen_shrnt_i64(MO_32, d, n, shr);
7204}
7205
7206static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7207{
7208 tcg_gen_shri_i64(n, n, shr);
7209 tcg_gen_deposit_i64(d, d, n, 32, 32);
7210}
7211
7212static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7213{
7214 TCGv_vec t = tcg_temp_new_vec_matching(d);
7215 int halfbits = 4 << vece;
7216 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7217
7218 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7219 tcg_gen_dupi_vec(vece, t, mask);
7220 tcg_gen_bitsel_vec(vece, d, t, d, n);
7221 tcg_temp_free_vec(t);
7222}
7223
7224static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7225{
7226 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7227 static const GVecGen2i ops[3] = {
7228 { .fni8 = gen_shrnt16_i64,
7229 .fniv = gen_shrnt_vec,
7230 .opt_opc = vec_list,
7231 .load_dest = true,
7232 .fno = gen_helper_sve2_shrnt_h,
7233 .vece = MO_16 },
7234 { .fni8 = gen_shrnt32_i64,
7235 .fniv = gen_shrnt_vec,
7236 .opt_opc = vec_list,
7237 .load_dest = true,
7238 .fno = gen_helper_sve2_shrnt_s,
7239 .vece = MO_32 },
7240 { .fni8 = gen_shrnt64_i64,
7241 .fniv = gen_shrnt_vec,
7242 .opt_opc = vec_list,
7243 .load_dest = true,
7244 .fno = gen_helper_sve2_shrnt_d,
7245 .vece = MO_64 },
7246 };
7247 return do_sve2_shr_narrow(s, a, ops);
7248}
7249
7250static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7251{
7252 static const GVecGen2i ops[3] = {
7253 { .fno = gen_helper_sve2_rshrnb_h },
7254 { .fno = gen_helper_sve2_rshrnb_s },
7255 { .fno = gen_helper_sve2_rshrnb_d },
7256 };
7257 return do_sve2_shr_narrow(s, a, ops);
7258}
7259
7260static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7261{
7262 static const GVecGen2i ops[3] = {
7263 { .fno = gen_helper_sve2_rshrnt_h },
7264 { .fno = gen_helper_sve2_rshrnt_s },
7265 { .fno = gen_helper_sve2_rshrnt_d },
7266 };
7267 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7268}
7269
7270static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7271 TCGv_vec n, int64_t shr)
7272{
7273 TCGv_vec t = tcg_temp_new_vec_matching(d);
7274 int halfbits = 4 << vece;
7275
7276 tcg_gen_sari_vec(vece, n, n, shr);
7277 tcg_gen_dupi_vec(vece, t, 0);
7278 tcg_gen_smax_vec(vece, n, n, t);
7279 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7280 tcg_gen_umin_vec(vece, d, n, t);
7281 tcg_temp_free_vec(t);
7282}
7283
7284static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7285{
7286 static const TCGOpcode vec_list[] = {
7287 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7288 };
7289 static const GVecGen2i ops[3] = {
7290 { .fniv = gen_sqshrunb_vec,
7291 .opt_opc = vec_list,
7292 .fno = gen_helper_sve2_sqshrunb_h,
7293 .vece = MO_16 },
7294 { .fniv = gen_sqshrunb_vec,
7295 .opt_opc = vec_list,
7296 .fno = gen_helper_sve2_sqshrunb_s,
7297 .vece = MO_32 },
7298 { .fniv = gen_sqshrunb_vec,
7299 .opt_opc = vec_list,
7300 .fno = gen_helper_sve2_sqshrunb_d,
7301 .vece = MO_64 },
7302 };
7303 return do_sve2_shr_narrow(s, a, ops);
7304}
7305
7306static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7307 TCGv_vec n, int64_t shr)
7308{
7309 TCGv_vec t = tcg_temp_new_vec_matching(d);
7310 int halfbits = 4 << vece;
7311
7312 tcg_gen_sari_vec(vece, n, n, shr);
7313 tcg_gen_dupi_vec(vece, t, 0);
7314 tcg_gen_smax_vec(vece, n, n, t);
7315 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7316 tcg_gen_umin_vec(vece, n, n, t);
7317 tcg_gen_shli_vec(vece, n, n, halfbits);
7318 tcg_gen_bitsel_vec(vece, d, t, d, n);
7319 tcg_temp_free_vec(t);
7320}
7321
7322static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7323{
7324 static const TCGOpcode vec_list[] = {
7325 INDEX_op_shli_vec, INDEX_op_sari_vec,
7326 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7327 };
7328 static const GVecGen2i ops[3] = {
7329 { .fniv = gen_sqshrunt_vec,
7330 .opt_opc = vec_list,
7331 .load_dest = true,
7332 .fno = gen_helper_sve2_sqshrunt_h,
7333 .vece = MO_16 },
7334 { .fniv = gen_sqshrunt_vec,
7335 .opt_opc = vec_list,
7336 .load_dest = true,
7337 .fno = gen_helper_sve2_sqshrunt_s,
7338 .vece = MO_32 },
7339 { .fniv = gen_sqshrunt_vec,
7340 .opt_opc = vec_list,
7341 .load_dest = true,
7342 .fno = gen_helper_sve2_sqshrunt_d,
7343 .vece = MO_64 },
7344 };
7345 return do_sve2_shr_narrow(s, a, ops);
7346}
7347
7348static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7349{
7350 static const GVecGen2i ops[3] = {
7351 { .fno = gen_helper_sve2_sqrshrunb_h },
7352 { .fno = gen_helper_sve2_sqrshrunb_s },
7353 { .fno = gen_helper_sve2_sqrshrunb_d },
7354 };
7355 return do_sve2_shr_narrow(s, a, ops);
7356}
7357
7358static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7359{
7360 static const GVecGen2i ops[3] = {
7361 { .fno = gen_helper_sve2_sqrshrunt_h },
7362 { .fno = gen_helper_sve2_sqrshrunt_s },
7363 { .fno = gen_helper_sve2_sqrshrunt_d },
7364 };
7365 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7366}
7367
743bb147
RH
7368static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7369 TCGv_vec n, int64_t shr)
7370{
7371 TCGv_vec t = tcg_temp_new_vec_matching(d);
7372 int halfbits = 4 << vece;
7373 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7374 int64_t min = -max - 1;
7375
7376 tcg_gen_sari_vec(vece, n, n, shr);
7377 tcg_gen_dupi_vec(vece, t, min);
7378 tcg_gen_smax_vec(vece, n, n, t);
7379 tcg_gen_dupi_vec(vece, t, max);
7380 tcg_gen_smin_vec(vece, n, n, t);
7381 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7382 tcg_gen_and_vec(vece, d, n, t);
7383 tcg_temp_free_vec(t);
7384}
7385
7386static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7387{
7388 static const TCGOpcode vec_list[] = {
7389 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7390 };
7391 static const GVecGen2i ops[3] = {
7392 { .fniv = gen_sqshrnb_vec,
7393 .opt_opc = vec_list,
7394 .fno = gen_helper_sve2_sqshrnb_h,
7395 .vece = MO_16 },
7396 { .fniv = gen_sqshrnb_vec,
7397 .opt_opc = vec_list,
7398 .fno = gen_helper_sve2_sqshrnb_s,
7399 .vece = MO_32 },
7400 { .fniv = gen_sqshrnb_vec,
7401 .opt_opc = vec_list,
7402 .fno = gen_helper_sve2_sqshrnb_d,
7403 .vece = MO_64 },
7404 };
7405 return do_sve2_shr_narrow(s, a, ops);
7406}
7407
7408static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7409 TCGv_vec n, int64_t shr)
7410{
7411 TCGv_vec t = tcg_temp_new_vec_matching(d);
7412 int halfbits = 4 << vece;
7413 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7414 int64_t min = -max - 1;
7415
7416 tcg_gen_sari_vec(vece, n, n, shr);
7417 tcg_gen_dupi_vec(vece, t, min);
7418 tcg_gen_smax_vec(vece, n, n, t);
7419 tcg_gen_dupi_vec(vece, t, max);
7420 tcg_gen_smin_vec(vece, n, n, t);
7421 tcg_gen_shli_vec(vece, n, n, halfbits);
7422 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7423 tcg_gen_bitsel_vec(vece, d, t, d, n);
7424 tcg_temp_free_vec(t);
7425}
7426
7427static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7428{
7429 static const TCGOpcode vec_list[] = {
7430 INDEX_op_shli_vec, INDEX_op_sari_vec,
7431 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7432 };
7433 static const GVecGen2i ops[3] = {
7434 { .fniv = gen_sqshrnt_vec,
7435 .opt_opc = vec_list,
7436 .load_dest = true,
7437 .fno = gen_helper_sve2_sqshrnt_h,
7438 .vece = MO_16 },
7439 { .fniv = gen_sqshrnt_vec,
7440 .opt_opc = vec_list,
7441 .load_dest = true,
7442 .fno = gen_helper_sve2_sqshrnt_s,
7443 .vece = MO_32 },
7444 { .fniv = gen_sqshrnt_vec,
7445 .opt_opc = vec_list,
7446 .load_dest = true,
7447 .fno = gen_helper_sve2_sqshrnt_d,
7448 .vece = MO_64 },
7449 };
7450 return do_sve2_shr_narrow(s, a, ops);
7451}
7452
7453static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7454{
7455 static const GVecGen2i ops[3] = {
7456 { .fno = gen_helper_sve2_sqrshrnb_h },
7457 { .fno = gen_helper_sve2_sqrshrnb_s },
7458 { .fno = gen_helper_sve2_sqrshrnb_d },
7459 };
7460 return do_sve2_shr_narrow(s, a, ops);
7461}
7462
7463static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7464{
7465 static const GVecGen2i ops[3] = {
7466 { .fno = gen_helper_sve2_sqrshrnt_h },
7467 { .fno = gen_helper_sve2_sqrshrnt_s },
7468 { .fno = gen_helper_sve2_sqrshrnt_d },
7469 };
7470 return do_sve2_shr_narrow(s, a, ops);
7471}
7472
c13418da
RH
7473static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7474 TCGv_vec n, int64_t shr)
7475{
7476 TCGv_vec t = tcg_temp_new_vec_matching(d);
7477 int halfbits = 4 << vece;
7478
7479 tcg_gen_shri_vec(vece, n, n, shr);
7480 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7481 tcg_gen_umin_vec(vece, d, n, t);
7482 tcg_temp_free_vec(t);
7483}
7484
7485static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7486{
7487 static const TCGOpcode vec_list[] = {
7488 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7489 };
7490 static const GVecGen2i ops[3] = {
7491 { .fniv = gen_uqshrnb_vec,
7492 .opt_opc = vec_list,
7493 .fno = gen_helper_sve2_uqshrnb_h,
7494 .vece = MO_16 },
7495 { .fniv = gen_uqshrnb_vec,
7496 .opt_opc = vec_list,
7497 .fno = gen_helper_sve2_uqshrnb_s,
7498 .vece = MO_32 },
7499 { .fniv = gen_uqshrnb_vec,
7500 .opt_opc = vec_list,
7501 .fno = gen_helper_sve2_uqshrnb_d,
7502 .vece = MO_64 },
7503 };
7504 return do_sve2_shr_narrow(s, a, ops);
7505}
7506
7507static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7508 TCGv_vec n, int64_t shr)
7509{
7510 TCGv_vec t = tcg_temp_new_vec_matching(d);
7511 int halfbits = 4 << vece;
7512
7513 tcg_gen_shri_vec(vece, n, n, shr);
7514 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7515 tcg_gen_umin_vec(vece, n, n, t);
7516 tcg_gen_shli_vec(vece, n, n, halfbits);
7517 tcg_gen_bitsel_vec(vece, d, t, d, n);
7518 tcg_temp_free_vec(t);
7519}
7520
7521static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7522{
7523 static const TCGOpcode vec_list[] = {
7524 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7525 };
7526 static const GVecGen2i ops[3] = {
7527 { .fniv = gen_uqshrnt_vec,
7528 .opt_opc = vec_list,
7529 .load_dest = true,
7530 .fno = gen_helper_sve2_uqshrnt_h,
7531 .vece = MO_16 },
7532 { .fniv = gen_uqshrnt_vec,
7533 .opt_opc = vec_list,
7534 .load_dest = true,
7535 .fno = gen_helper_sve2_uqshrnt_s,
7536 .vece = MO_32 },
7537 { .fniv = gen_uqshrnt_vec,
7538 .opt_opc = vec_list,
7539 .load_dest = true,
7540 .fno = gen_helper_sve2_uqshrnt_d,
7541 .vece = MO_64 },
7542 };
7543 return do_sve2_shr_narrow(s, a, ops);
7544}
7545
7546static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7547{
7548 static const GVecGen2i ops[3] = {
7549 { .fno = gen_helper_sve2_uqrshrnb_h },
7550 { .fno = gen_helper_sve2_uqrshrnb_s },
7551 { .fno = gen_helper_sve2_uqrshrnb_d },
7552 };
7553 return do_sve2_shr_narrow(s, a, ops);
7554}
7555
7556static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7557{
7558 static const GVecGen2i ops[3] = {
7559 { .fno = gen_helper_sve2_uqrshrnt_h },
7560 { .fno = gen_helper_sve2_uqrshrnt_s },
7561 { .fno = gen_helper_sve2_uqrshrnt_d },
7562 };
7563 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7564}
b87dbeeb 7565
40d5ea50 7566#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7567 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7568 NULL, gen_helper_sve2_##name##_h, \
7569 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7570 }; \
bd394cf5
RH
7571 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7572 name##_fns[a->esz], a, 0)
40d5ea50
SL
7573
7574DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7575DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7576DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7577DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7578
c3cd6766
SL
7579DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7580DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7581DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7582DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7583
e0ae6ec3
SL
7584static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
7585 gen_helper_gvec_flags_4 *fn)
7586{
7587 if (!dc_isar_feature(aa64_sve2, s)) {
7588 return false;
7589 }
7590 return do_ppzz_flags(s, a, fn);
7591}
7592
7593#define DO_SVE2_PPZZ_MATCH(NAME, name) \
7594static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7595{ \
7596 static gen_helper_gvec_flags_4 * const fns[4] = { \
7597 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
7598 NULL, NULL \
7599 }; \
7600 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
7601}
7602
7603DO_SVE2_PPZZ_MATCH(MATCH, match)
7604DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
7605
5880bdc0
RH
7606static gen_helper_gvec_4 * const histcnt_fns[4] = {
7607 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7608};
7609TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7610 histcnt_fns[a->esz], a, 0)
7d47ac94 7611
bd394cf5
RH
7612TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7613 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7614
b87dbeeb
SL
7615static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7616 gen_helper_gvec_4_ptr *fn)
7617{
7618 if (!dc_isar_feature(aa64_sve2, s)) {
7619 return false;
7620 }
7621 return do_zpzz_fp(s, a, fn);
7622}
7623
7624#define DO_SVE2_ZPZZ_FP(NAME, name) \
7625static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7626{ \
7627 static gen_helper_gvec_4_ptr * const fns[4] = { \
7628 NULL, gen_helper_sve2_##name##_zpzz_h, \
7629 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7630 }; \
7631 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7632}
7633
7634DO_SVE2_ZPZZ_FP(FADDP, faddp)
7635DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7636DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7637DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7638DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7639
7640/*
7641 * SVE Integer Multiply-Add (unpredicated)
7642 */
7643
4f26756b
SL
7644static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
7645{
7646 gen_helper_gvec_4_ptr *fn;
7647
7648 switch (a->esz) {
7649 case MO_32:
7650 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
7651 return false;
7652 }
7653 fn = gen_helper_fmmla_s;
7654 break;
7655 case MO_64:
7656 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
7657 return false;
7658 }
7659 fn = gen_helper_fmmla_d;
7660 break;
7661 default:
7662 return false;
7663 }
7664
7665 if (sve_access_check(s)) {
7666 unsigned vsz = vec_full_reg_size(s);
7667 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7668 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7669 vec_full_reg_offset(s, a->rn),
7670 vec_full_reg_offset(s, a->rm),
7671 vec_full_reg_offset(s, a->ra),
7672 status, vsz, vsz, 0, fn);
7673 tcg_temp_free_ptr(status);
7674 }
7675 return true;
7676}
7677
eeb4e84d
RH
7678static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7679 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7680 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7681};
7682TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7683 sqdmlal_zzzw_fns[a->esz], a, 0)
7684TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7685 sqdmlal_zzzw_fns[a->esz], a, 3)
7686TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7687 sqdmlal_zzzw_fns[a->esz], a, 2)
7688
7689static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7690 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7691 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7692};
7693TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7694 sqdmlsl_zzzw_fns[a->esz], a, 0)
7695TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7696 sqdmlsl_zzzw_fns[a->esz], a, 3)
7697TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7698 sqdmlsl_zzzw_fns[a->esz], a, 2)
7699
7700static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7701 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7702 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7703};
7704TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7705 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7706
eeb4e84d
RH
7707static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7708 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7709 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7710};
7711TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7712 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7713
eeb4e84d
RH
7714static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7715 NULL, gen_helper_sve2_smlal_zzzw_h,
7716 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7717};
7718TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7719 smlal_zzzw_fns[a->esz], a, 0)
7720TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7721 smlal_zzzw_fns[a->esz], a, 1)
7722
7723static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7724 NULL, gen_helper_sve2_umlal_zzzw_h,
7725 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7726};
7727TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7728 umlal_zzzw_fns[a->esz], a, 0)
7729TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7730 umlal_zzzw_fns[a->esz], a, 1)
7731
7732static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7733 NULL, gen_helper_sve2_smlsl_zzzw_h,
7734 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7735};
7736TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7737 smlsl_zzzw_fns[a->esz], a, 0)
7738TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7739 smlsl_zzzw_fns[a->esz], a, 1)
7740
7741static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7742 NULL, gen_helper_sve2_umlsl_zzzw_h,
7743 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7744};
7745TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7746 umlsl_zzzw_fns[a->esz], a, 0)
7747TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7748 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7749
5f425b92
RH
7750static gen_helper_gvec_4 * const cmla_fns[] = {
7751 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7752 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7753};
7754TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7755 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7756
5f425b92
RH
7757static gen_helper_gvec_4 * const cdot_fns[] = {
7758 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7759};
7760TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7761 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7762
5f425b92
RH
7763static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7764 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7765 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7766};
7767TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7768 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7769
8740d694
RH
7770TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7771 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7772
0ea3cdbf
RH
7773TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7774 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7775
32e2ad65
RH
7776TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7777 gen_helper_crypto_aese, a, false)
7778TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7779 gen_helper_crypto_aese, a, true)
3cc7a88e 7780
32e2ad65
RH
7781TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7782 gen_helper_crypto_sm4e, a, 0)
7783TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7784 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7785
2aa469ff 7786TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
5c1b7226
RH
7787
7788static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
7789{
7790 if (!dc_isar_feature(aa64_sve2, s)) {
7791 return false;
7792 }
7793 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
7794}
7795
d29b17ca
RH
7796static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
7797{
7798 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7799 return false;
7800 }
7801 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
7802}
7803
5c1b7226
RH
7804static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
7805{
7806 if (!dc_isar_feature(aa64_sve2, s)) {
7807 return false;
7808 }
7809 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
7810}
83c2523f
SL
7811
7812static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
7813{
7814 if (!dc_isar_feature(aa64_sve2, s)) {
7815 return false;
7816 }
7817 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
7818}
7819
7820static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
7821{
7822 if (!dc_isar_feature(aa64_sve2, s)) {
7823 return false;
7824 }
7825 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
7826}
95365277
SL
7827
7828static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
7829{
7830 if (!dc_isar_feature(aa64_sve2, s)) {
7831 return false;
7832 }
7833 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
7834}
7835
7836static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
7837{
7838 if (!dc_isar_feature(aa64_sve2, s)) {
7839 return false;
7840 }
7841 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
7842}
631be02e
SL
7843
7844static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
7845{
7846 static gen_helper_gvec_3_ptr * const fns[] = {
7847 NULL, gen_helper_flogb_h,
7848 gen_helper_flogb_s, gen_helper_flogb_d
7849 };
7850
7851 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
7852 return false;
7853 }
7854 if (sve_access_check(s)) {
7855 TCGv_ptr status =
7856 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7857 unsigned vsz = vec_full_reg_size(s);
7858
7859 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
7860 vec_full_reg_offset(s, a->rn),
7861 pred_full_reg_offset(s, a->pg),
7862 status, vsz, vsz, 0, fns[a->esz]);
7863 tcg_temp_free_ptr(status);
7864 }
7865 return true;
7866}
50d102bd
SL
7867
7868static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7869{
7870 if (!dc_isar_feature(aa64_sve2, s)) {
7871 return false;
7872 }
7873 if (sve_access_check(s)) {
7874 unsigned vsz = vec_full_reg_size(s);
7875 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7876 vec_full_reg_offset(s, a->rn),
7877 vec_full_reg_offset(s, a->rm),
7878 vec_full_reg_offset(s, a->ra),
7879 cpu_env, vsz, vsz, (sel << 1) | sub,
7880 gen_helper_sve2_fmlal_zzzw_s);
7881 }
7882 return true;
7883}
7884
7885static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7886{
7887 return do_FMLAL_zzzw(s, a, false, false);
7888}
7889
7890static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7891{
7892 return do_FMLAL_zzzw(s, a, false, true);
7893}
7894
7895static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7896{
7897 return do_FMLAL_zzzw(s, a, true, false);
7898}
7899
7900static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7901{
7902 return do_FMLAL_zzzw(s, a, true, true);
7903}
7904
7905static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7906{
7907 if (!dc_isar_feature(aa64_sve2, s)) {
7908 return false;
7909 }
7910 if (sve_access_check(s)) {
7911 unsigned vsz = vec_full_reg_size(s);
7912 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7913 vec_full_reg_offset(s, a->rn),
7914 vec_full_reg_offset(s, a->rm),
7915 vec_full_reg_offset(s, a->ra),
7916 cpu_env, vsz, vsz,
7917 (a->index << 2) | (sel << 1) | sub,
7918 gen_helper_sve2_fmlal_zzxw_s);
7919 }
7920 return true;
7921}
7922
7923static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7924{
7925 return do_FMLAL_zzxw(s, a, false, false);
7926}
7927
7928static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7929{
7930 return do_FMLAL_zzxw(s, a, false, true);
7931}
7932
7933static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7934{
7935 return do_FMLAL_zzxw(s, a, true, false);
7936}
7937
7938static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7939{
7940 return do_FMLAL_zzxw(s, a, true, true);
7941}
2323c5ff 7942
eec05e4e
RH
7943TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7944 gen_helper_gvec_smmla_b, a, 0)
7945TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7946 gen_helper_gvec_usmmla_b, a, 0)
7947TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7948 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7949
eec05e4e
RH
7950TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7951 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7952TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7953 gen_helper_gvec_bfdot_idx, a)
81266a1f 7954
eec05e4e
RH
7955TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7956 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7957
7958static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7959{
7960 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7961 return false;
7962 }
7963 if (sve_access_check(s)) {
7964 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7965 unsigned vsz = vec_full_reg_size(s);
7966
7967 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7968 vec_full_reg_offset(s, a->rn),
7969 vec_full_reg_offset(s, a->rm),
7970 vec_full_reg_offset(s, a->ra),
7971 status, vsz, vsz, sel,
7972 gen_helper_gvec_bfmlal);
7973 tcg_temp_free_ptr(status);
7974 }
7975 return true;
7976}
7977
7978static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7979{
7980 return do_BFMLAL_zzzw(s, a, false);
7981}
7982
7983static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7984{
7985 return do_BFMLAL_zzzw(s, a, true);
7986}
458d0ab6
RH
7987
7988static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7989{
7990 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7991 return false;
7992 }
7993 if (sve_access_check(s)) {
7994 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7995 unsigned vsz = vec_full_reg_size(s);
7996
7997 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7998 vec_full_reg_offset(s, a->rn),
7999 vec_full_reg_offset(s, a->rm),
8000 vec_full_reg_offset(s, a->ra),
8001 status, vsz, vsz, (a->index << 1) | sel,
8002 gen_helper_gvec_bfmlal_idx);
8003 tcg_temp_free_ptr(status);
8004 }
8005 return true;
8006}
8007
8008static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8009{
8010 return do_BFMLAL_zzxw(s, a, false);
8011}
8012
8013static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8014{
8015 return do_BFMLAL_zzxw(s, a, true);
8016}