]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Introduce gen_gvec_ool_arg_zpz
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638 178/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 179static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
180 int rd, int rn, int rm, int ra, int data)
181{
7ad416b1
RH
182 if (fn == NULL) {
183 return false;
184 }
185 if (sve_access_check(s)) {
186 unsigned vsz = vec_full_reg_size(s);
187 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
188 vec_full_reg_offset(s, rn),
189 vec_full_reg_offset(s, rm),
190 vec_full_reg_offset(s, ra),
191 vsz, vsz, data, fn);
192 }
193 return true;
38650638
RH
194}
195
cab79ac9
RH
196static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
197 arg_rrrr_esz *a, int data)
198{
199 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
200}
201
e82d3536
RH
202static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
203 arg_rrxr_esz *a)
204{
205 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
206}
207
96a461f7 208/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 209static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
210 int rd, int rn, int pg, int data)
211{
8fb27a21
RH
212 if (fn == NULL) {
213 return false;
214 }
215 if (sve_access_check(s)) {
216 unsigned vsz = vec_full_reg_size(s);
217 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
218 vec_full_reg_offset(s, rn),
219 pred_full_reg_offset(s, pg),
220 vsz, vsz, data, fn);
221 }
222 return true;
96a461f7
RH
223}
224
b051809a
RH
225static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
226 arg_rpr_esz *a, int data)
227{
228 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
229}
230
231
36cbb7a8
RH
232/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
233static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
234 int rd, int rn, int rm, int pg, int data)
235{
236 unsigned vsz = vec_full_reg_size(s);
237 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
238 vec_full_reg_offset(s, rn),
239 vec_full_reg_offset(s, rm),
240 pred_full_reg_offset(s, pg),
241 vsz, vsz, data, fn);
242}
f7d79c41 243
36cbb7a8 244/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
245static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
246 int esz, int rd, int rn)
38388f7e 247{
f7d79c41
RH
248 unsigned vsz = vec_full_reg_size(s);
249 gvec_fn(esz, vec_full_reg_offset(s, rd),
250 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
251}
252
39eea561 253/* Invoke a vector expander on three Zregs. */
28c4da31
RH
254static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
255 int esz, int rd, int rn, int rm)
38388f7e 256{
28c4da31
RH
257 unsigned vsz = vec_full_reg_size(s);
258 gvec_fn(esz, vec_full_reg_offset(s, rd),
259 vec_full_reg_offset(s, rn),
260 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
261}
262
911cdc6d
RH
263/* Invoke a vector expander on four Zregs. */
264static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
265 int esz, int rd, int rn, int rm, int ra)
266{
267 unsigned vsz = vec_full_reg_size(s);
268 gvec_fn(esz, vec_full_reg_offset(s, rd),
269 vec_full_reg_offset(s, rn),
270 vec_full_reg_offset(s, rm),
271 vec_full_reg_offset(s, ra), vsz, vsz);
272}
273
39eea561
RH
274/* Invoke a vector move on two Zregs. */
275static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 276{
f7d79c41
RH
277 if (sve_access_check(s)) {
278 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
279 }
280 return true;
38388f7e
RH
281}
282
d9d78dcc
RH
283/* Initialize a Zreg with replications of a 64-bit immediate. */
284static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
285{
286 unsigned vsz = vec_full_reg_size(s);
8711e71f 287 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
288}
289
516e246a 290/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
291static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
292 int rd, int rn, int rm)
516e246a 293{
dd81a8d7
RH
294 unsigned psz = pred_gvec_reg_size(s);
295 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
296 pred_full_reg_offset(s, rn),
297 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
298}
299
300/* Invoke a vector move on two Pregs. */
301static bool do_mov_p(DisasContext *s, int rd, int rn)
302{
d0b2df5a
RH
303 if (sve_access_check(s)) {
304 unsigned psz = pred_gvec_reg_size(s);
305 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
306 pred_full_reg_offset(s, rn), psz, psz);
307 }
308 return true;
516e246a
RH
309}
310
9e18d7a6
RH
311/* Set the cpu flags as per a return from an SVE helper. */
312static void do_pred_flags(TCGv_i32 t)
313{
314 tcg_gen_mov_i32(cpu_NF, t);
315 tcg_gen_andi_i32(cpu_ZF, t, 2);
316 tcg_gen_andi_i32(cpu_CF, t, 1);
317 tcg_gen_movi_i32(cpu_VF, 0);
318}
319
320/* Subroutines computing the ARM PredTest psuedofunction. */
321static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
322{
323 TCGv_i32 t = tcg_temp_new_i32();
324
325 gen_helper_sve_predtest1(t, d, g);
326 do_pred_flags(t);
327 tcg_temp_free_i32(t);
328}
329
330static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
331{
332 TCGv_ptr dptr = tcg_temp_new_ptr();
333 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 334 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
335
336 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
337 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 338
392acacc 339 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
340 tcg_temp_free_ptr(dptr);
341 tcg_temp_free_ptr(gptr);
342
343 do_pred_flags(t);
344 tcg_temp_free_i32(t);
345}
346
028e2a7b
RH
347/* For each element size, the bits within a predicate word that are active. */
348const uint64_t pred_esz_masks[4] = {
349 0xffffffffffffffffull, 0x5555555555555555ull,
350 0x1111111111111111ull, 0x0101010101010101ull
351};
352
39eea561
RH
353/*
354 *** SVE Logical - Unpredicated Group
355 */
356
28c4da31
RH
357static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
358{
359 if (sve_access_check(s)) {
360 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
361 }
362 return true;
363}
364
3a7be554 365static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 366{
28c4da31 367 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
368}
369
3a7be554 370static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 371{
28c4da31 372 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
373}
374
3a7be554 375static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 376{
28c4da31 377 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
378}
379
3a7be554 380static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 381{
28c4da31 382 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 383}
d1822297 384
e6eba6e5
RH
385static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
386{
387 TCGv_i64 t = tcg_temp_new_i64();
388 uint64_t mask = dup_const(MO_8, 0xff >> sh);
389
390 tcg_gen_xor_i64(t, n, m);
391 tcg_gen_shri_i64(d, t, sh);
392 tcg_gen_shli_i64(t, t, 8 - sh);
393 tcg_gen_andi_i64(d, d, mask);
394 tcg_gen_andi_i64(t, t, ~mask);
395 tcg_gen_or_i64(d, d, t);
396 tcg_temp_free_i64(t);
397}
398
399static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
400{
401 TCGv_i64 t = tcg_temp_new_i64();
402 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
403
404 tcg_gen_xor_i64(t, n, m);
405 tcg_gen_shri_i64(d, t, sh);
406 tcg_gen_shli_i64(t, t, 16 - sh);
407 tcg_gen_andi_i64(d, d, mask);
408 tcg_gen_andi_i64(t, t, ~mask);
409 tcg_gen_or_i64(d, d, t);
410 tcg_temp_free_i64(t);
411}
412
413static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
414{
415 tcg_gen_xor_i32(d, n, m);
416 tcg_gen_rotri_i32(d, d, sh);
417}
418
419static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
420{
421 tcg_gen_xor_i64(d, n, m);
422 tcg_gen_rotri_i64(d, d, sh);
423}
424
425static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
426 TCGv_vec m, int64_t sh)
427{
428 tcg_gen_xor_vec(vece, d, n, m);
429 tcg_gen_rotri_vec(vece, d, d, sh);
430}
431
432void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
433 uint32_t rm_ofs, int64_t shift,
434 uint32_t opr_sz, uint32_t max_sz)
435{
436 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
437 static const GVecGen3i ops[4] = {
438 { .fni8 = gen_xar8_i64,
439 .fniv = gen_xar_vec,
440 .fno = gen_helper_sve2_xar_b,
441 .opt_opc = vecop,
442 .vece = MO_8 },
443 { .fni8 = gen_xar16_i64,
444 .fniv = gen_xar_vec,
445 .fno = gen_helper_sve2_xar_h,
446 .opt_opc = vecop,
447 .vece = MO_16 },
448 { .fni4 = gen_xar_i32,
449 .fniv = gen_xar_vec,
450 .fno = gen_helper_sve2_xar_s,
451 .opt_opc = vecop,
452 .vece = MO_32 },
453 { .fni8 = gen_xar_i64,
454 .fniv = gen_xar_vec,
455 .fno = gen_helper_gvec_xar_d,
456 .opt_opc = vecop,
457 .vece = MO_64 }
458 };
459 int esize = 8 << vece;
460
461 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
462 tcg_debug_assert(shift >= 0);
463 tcg_debug_assert(shift <= esize);
464 shift &= esize - 1;
465
466 if (shift == 0) {
467 /* xar with no rotate devolves to xor. */
468 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
469 } else {
470 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
471 shift, &ops[vece]);
472 }
473}
474
475static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
476{
477 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
478 return false;
479 }
480 if (sve_access_check(s)) {
481 unsigned vsz = vec_full_reg_size(s);
482 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
483 vec_full_reg_offset(s, a->rn),
484 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
485 }
486 return true;
487}
488
911cdc6d
RH
489static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
490{
491 if (!dc_isar_feature(aa64_sve2, s)) {
492 return false;
493 }
494 if (sve_access_check(s)) {
495 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
496 }
497 return true;
498}
499
500static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
501{
502 tcg_gen_xor_i64(d, n, m);
503 tcg_gen_xor_i64(d, d, k);
504}
505
506static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
507 TCGv_vec m, TCGv_vec k)
508{
509 tcg_gen_xor_vec(vece, d, n, m);
510 tcg_gen_xor_vec(vece, d, d, k);
511}
512
513static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
514 uint32_t a, uint32_t oprsz, uint32_t maxsz)
515{
516 static const GVecGen4 op = {
517 .fni8 = gen_eor3_i64,
518 .fniv = gen_eor3_vec,
519 .fno = gen_helper_sve2_eor3,
520 .vece = MO_64,
521 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
522 };
523 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
524}
525
526static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
527{
528 return do_sve2_zzzz_fn(s, a, gen_eor3);
529}
530
531static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
532{
533 tcg_gen_andc_i64(d, m, k);
534 tcg_gen_xor_i64(d, d, n);
535}
536
537static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
538 TCGv_vec m, TCGv_vec k)
539{
540 tcg_gen_andc_vec(vece, d, m, k);
541 tcg_gen_xor_vec(vece, d, d, n);
542}
543
544static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
545 uint32_t a, uint32_t oprsz, uint32_t maxsz)
546{
547 static const GVecGen4 op = {
548 .fni8 = gen_bcax_i64,
549 .fniv = gen_bcax_vec,
550 .fno = gen_helper_sve2_bcax,
551 .vece = MO_64,
552 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
553 };
554 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
555}
556
557static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
558{
559 return do_sve2_zzzz_fn(s, a, gen_bcax);
560}
561
562static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
563 uint32_t a, uint32_t oprsz, uint32_t maxsz)
564{
565 /* BSL differs from the generic bitsel in argument ordering. */
566 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
567}
568
569static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
570{
571 return do_sve2_zzzz_fn(s, a, gen_bsl);
572}
573
574static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
575{
576 tcg_gen_andc_i64(n, k, n);
577 tcg_gen_andc_i64(m, m, k);
578 tcg_gen_or_i64(d, n, m);
579}
580
581static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
582 TCGv_vec m, TCGv_vec k)
583{
584 if (TCG_TARGET_HAS_bitsel_vec) {
585 tcg_gen_not_vec(vece, n, n);
586 tcg_gen_bitsel_vec(vece, d, k, n, m);
587 } else {
588 tcg_gen_andc_vec(vece, n, k, n);
589 tcg_gen_andc_vec(vece, m, m, k);
590 tcg_gen_or_vec(vece, d, n, m);
591 }
592}
593
594static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
595 uint32_t a, uint32_t oprsz, uint32_t maxsz)
596{
597 static const GVecGen4 op = {
598 .fni8 = gen_bsl1n_i64,
599 .fniv = gen_bsl1n_vec,
600 .fno = gen_helper_sve2_bsl1n,
601 .vece = MO_64,
602 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
603 };
604 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
605}
606
607static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
608{
609 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
610}
611
612static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
613{
614 /*
615 * Z[dn] = (n & k) | (~m & ~k)
616 * = | ~(m | k)
617 */
618 tcg_gen_and_i64(n, n, k);
619 if (TCG_TARGET_HAS_orc_i64) {
620 tcg_gen_or_i64(m, m, k);
621 tcg_gen_orc_i64(d, n, m);
622 } else {
623 tcg_gen_nor_i64(m, m, k);
624 tcg_gen_or_i64(d, n, m);
625 }
626}
627
628static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
629 TCGv_vec m, TCGv_vec k)
630{
631 if (TCG_TARGET_HAS_bitsel_vec) {
632 tcg_gen_not_vec(vece, m, m);
633 tcg_gen_bitsel_vec(vece, d, k, n, m);
634 } else {
635 tcg_gen_and_vec(vece, n, n, k);
636 tcg_gen_or_vec(vece, m, m, k);
637 tcg_gen_orc_vec(vece, d, n, m);
638 }
639}
640
641static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
642 uint32_t a, uint32_t oprsz, uint32_t maxsz)
643{
644 static const GVecGen4 op = {
645 .fni8 = gen_bsl2n_i64,
646 .fniv = gen_bsl2n_vec,
647 .fno = gen_helper_sve2_bsl2n,
648 .vece = MO_64,
649 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
650 };
651 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
652}
653
654static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
655{
656 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
657}
658
659static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
660{
661 tcg_gen_and_i64(n, n, k);
662 tcg_gen_andc_i64(m, m, k);
663 tcg_gen_nor_i64(d, n, m);
664}
665
666static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
667 TCGv_vec m, TCGv_vec k)
668{
669 tcg_gen_bitsel_vec(vece, d, k, n, m);
670 tcg_gen_not_vec(vece, d, d);
671}
672
673static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
674 uint32_t a, uint32_t oprsz, uint32_t maxsz)
675{
676 static const GVecGen4 op = {
677 .fni8 = gen_nbsl_i64,
678 .fniv = gen_nbsl_vec,
679 .fno = gen_helper_sve2_nbsl,
680 .vece = MO_64,
681 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
682 };
683 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
684}
685
686static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
687{
688 return do_sve2_zzzz_fn(s, a, gen_nbsl);
689}
690
fea98f9c
RH
691/*
692 *** SVE Integer Arithmetic - Unpredicated Group
693 */
694
3a7be554 695static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 696{
28c4da31 697 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
698}
699
3a7be554 700static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 701{
28c4da31 702 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
703}
704
3a7be554 705static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 706{
28c4da31 707 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
708}
709
3a7be554 710static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 711{
28c4da31 712 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
713}
714
3a7be554 715static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 716{
28c4da31 717 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
718}
719
3a7be554 720static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 721{
28c4da31 722 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
723}
724
f97cfd59
RH
725/*
726 *** SVE Integer Arithmetic - Binary Predicated Group
727 */
728
729static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
730{
f97cfd59
RH
731 if (fn == NULL) {
732 return false;
733 }
734 if (sve_access_check(s)) {
36cbb7a8 735 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
736 }
737 return true;
738}
739
a2103582
RH
740/* Select active elememnts from Zn and inactive elements from Zm,
741 * storing the result in Zd.
742 */
743static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
744{
745 static gen_helper_gvec_4 * const fns[4] = {
746 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
747 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
748 };
36cbb7a8 749 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
750}
751
f97cfd59 752#define DO_ZPZZ(NAME, name) \
3a7be554 753static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
754{ \
755 static gen_helper_gvec_4 * const fns[4] = { \
756 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
757 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
758 }; \
759 return do_zpzz_ool(s, a, fns[a->esz]); \
760}
761
762DO_ZPZZ(AND, and)
763DO_ZPZZ(EOR, eor)
764DO_ZPZZ(ORR, orr)
765DO_ZPZZ(BIC, bic)
766
767DO_ZPZZ(ADD, add)
768DO_ZPZZ(SUB, sub)
769
770DO_ZPZZ(SMAX, smax)
771DO_ZPZZ(UMAX, umax)
772DO_ZPZZ(SMIN, smin)
773DO_ZPZZ(UMIN, umin)
774DO_ZPZZ(SABD, sabd)
775DO_ZPZZ(UABD, uabd)
776
777DO_ZPZZ(MUL, mul)
778DO_ZPZZ(SMULH, smulh)
779DO_ZPZZ(UMULH, umulh)
780
27721dbb
RH
781DO_ZPZZ(ASR, asr)
782DO_ZPZZ(LSR, lsr)
783DO_ZPZZ(LSL, lsl)
784
3a7be554 785static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
786{
787 static gen_helper_gvec_4 * const fns[4] = {
788 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
789 };
790 return do_zpzz_ool(s, a, fns[a->esz]);
791}
792
3a7be554 793static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
794{
795 static gen_helper_gvec_4 * const fns[4] = {
796 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
797 };
798 return do_zpzz_ool(s, a, fns[a->esz]);
799}
800
3a7be554 801static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
802{
803 if (sve_access_check(s)) {
804 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
805 }
806 return true;
807}
d3fe4a29 808
f97cfd59
RH
809#undef DO_ZPZZ
810
afac6d04
RH
811/*
812 *** SVE Integer Arithmetic - Unary Predicated Group
813 */
814
afac6d04 815#define DO_ZPZ(NAME, name) \
3a7be554 816static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
817{ \
818 static gen_helper_gvec_3 * const fns[4] = { \
819 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
820 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
821 }; \
b051809a 822 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0); \
afac6d04
RH
823}
824
825DO_ZPZ(CLS, cls)
826DO_ZPZ(CLZ, clz)
827DO_ZPZ(CNT_zpz, cnt_zpz)
828DO_ZPZ(CNOT, cnot)
829DO_ZPZ(NOT_zpz, not_zpz)
830DO_ZPZ(ABS, abs)
831DO_ZPZ(NEG, neg)
832
3a7be554 833static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
834{
835 static gen_helper_gvec_3 * const fns[4] = {
836 NULL,
837 gen_helper_sve_fabs_h,
838 gen_helper_sve_fabs_s,
839 gen_helper_sve_fabs_d
840 };
b051809a 841 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
afac6d04
RH
842}
843
3a7be554 844static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
845{
846 static gen_helper_gvec_3 * const fns[4] = {
847 NULL,
848 gen_helper_sve_fneg_h,
849 gen_helper_sve_fneg_s,
850 gen_helper_sve_fneg_d
851 };
b051809a 852 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
afac6d04
RH
853}
854
3a7be554 855static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
856{
857 static gen_helper_gvec_3 * const fns[4] = {
858 NULL,
859 gen_helper_sve_sxtb_h,
860 gen_helper_sve_sxtb_s,
861 gen_helper_sve_sxtb_d
862 };
b051809a 863 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
afac6d04
RH
864}
865
3a7be554 866static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
867{
868 static gen_helper_gvec_3 * const fns[4] = {
869 NULL,
870 gen_helper_sve_uxtb_h,
871 gen_helper_sve_uxtb_s,
872 gen_helper_sve_uxtb_d
873 };
b051809a 874 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
afac6d04
RH
875}
876
3a7be554 877static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
878{
879 static gen_helper_gvec_3 * const fns[4] = {
880 NULL, NULL,
881 gen_helper_sve_sxth_s,
882 gen_helper_sve_sxth_d
883 };
b051809a 884 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
afac6d04
RH
885}
886
3a7be554 887static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
888{
889 static gen_helper_gvec_3 * const fns[4] = {
890 NULL, NULL,
891 gen_helper_sve_uxth_s,
892 gen_helper_sve_uxth_d
893 };
b051809a 894 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
afac6d04
RH
895}
896
3a7be554 897static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04 898{
b051809a
RH
899 return gen_gvec_ool_arg_zpz(s, a->esz == 3 ? gen_helper_sve_sxtw_d
900 : NULL, a, 0);
afac6d04
RH
901}
902
3a7be554 903static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04 904{
b051809a
RH
905 return gen_gvec_ool_arg_zpz(s, a->esz == 3 ? gen_helper_sve_uxtw_d
906 : NULL, a, 0);
afac6d04
RH
907}
908
909#undef DO_ZPZ
910
047cec97
RH
911/*
912 *** SVE Integer Reduction Group
913 */
914
915typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
916static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
917 gen_helper_gvec_reduc *fn)
918{
919 unsigned vsz = vec_full_reg_size(s);
920 TCGv_ptr t_zn, t_pg;
921 TCGv_i32 desc;
922 TCGv_i64 temp;
923
924 if (fn == NULL) {
925 return false;
926 }
927 if (!sve_access_check(s)) {
928 return true;
929 }
930
c6a59b55 931 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
932 temp = tcg_temp_new_i64();
933 t_zn = tcg_temp_new_ptr();
934 t_pg = tcg_temp_new_ptr();
935
936 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
937 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
938 fn(temp, t_zn, t_pg, desc);
939 tcg_temp_free_ptr(t_zn);
940 tcg_temp_free_ptr(t_pg);
047cec97
RH
941
942 write_fp_dreg(s, a->rd, temp);
943 tcg_temp_free_i64(temp);
944 return true;
945}
946
947#define DO_VPZ(NAME, name) \
3a7be554 948static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
949{ \
950 static gen_helper_gvec_reduc * const fns[4] = { \
951 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
952 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
953 }; \
954 return do_vpz_ool(s, a, fns[a->esz]); \
955}
956
957DO_VPZ(ORV, orv)
958DO_VPZ(ANDV, andv)
959DO_VPZ(EORV, eorv)
960
961DO_VPZ(UADDV, uaddv)
962DO_VPZ(SMAXV, smaxv)
963DO_VPZ(UMAXV, umaxv)
964DO_VPZ(SMINV, sminv)
965DO_VPZ(UMINV, uminv)
966
3a7be554 967static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
968{
969 static gen_helper_gvec_reduc * const fns[4] = {
970 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
971 gen_helper_sve_saddv_s, NULL
972 };
973 return do_vpz_ool(s, a, fns[a->esz]);
974}
975
976#undef DO_VPZ
977
ccd841c3
RH
978/*
979 *** SVE Shift by Immediate - Predicated Group
980 */
981
60245996
RH
982/*
983 * Copy Zn into Zd, storing zeros into inactive elements.
984 * If invert, store zeros into the active elements.
ccd841c3 985 */
60245996
RH
986static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
987 int esz, bool invert)
ccd841c3 988{
60245996
RH
989 static gen_helper_gvec_3 * const fns[4] = {
990 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
991 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 992 };
8fb27a21 993 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
994}
995
996static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
997 gen_helper_gvec_3 *fn)
998{
8fb27a21 999 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
1000}
1001
3a7be554 1002static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1003{
1004 static gen_helper_gvec_3 * const fns[4] = {
1005 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1006 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1007 };
1008 if (a->esz < 0) {
1009 /* Invalid tsz encoding -- see tszimm_esz. */
1010 return false;
1011 }
1012 /* Shift by element size is architecturally valid. For
1013 arithmetic right-shift, it's the same as by one less. */
1014 a->imm = MIN(a->imm, (8 << a->esz) - 1);
1015 return do_zpzi_ool(s, a, fns[a->esz]);
1016}
1017
3a7be554 1018static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1019{
1020 static gen_helper_gvec_3 * const fns[4] = {
1021 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1022 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1023 };
1024 if (a->esz < 0) {
1025 return false;
1026 }
1027 /* Shift by element size is architecturally valid.
1028 For logical shifts, it is a zeroing operation. */
1029 if (a->imm >= (8 << a->esz)) {
60245996 1030 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1031 } else {
1032 return do_zpzi_ool(s, a, fns[a->esz]);
1033 }
1034}
1035
3a7be554 1036static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1037{
1038 static gen_helper_gvec_3 * const fns[4] = {
1039 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1040 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1041 };
1042 if (a->esz < 0) {
1043 return false;
1044 }
1045 /* Shift by element size is architecturally valid.
1046 For logical shifts, it is a zeroing operation. */
1047 if (a->imm >= (8 << a->esz)) {
60245996 1048 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1049 } else {
1050 return do_zpzi_ool(s, a, fns[a->esz]);
1051 }
1052}
1053
3a7be554 1054static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1055{
1056 static gen_helper_gvec_3 * const fns[4] = {
1057 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1058 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1059 };
1060 if (a->esz < 0) {
1061 return false;
1062 }
1063 /* Shift by element size is architecturally valid. For arithmetic
1064 right shift for division, it is a zeroing operation. */
1065 if (a->imm >= (8 << a->esz)) {
60245996 1066 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1067 } else {
1068 return do_zpzi_ool(s, a, fns[a->esz]);
1069 }
1070}
1071
a5421b54
SL
1072static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1073{
1074 static gen_helper_gvec_3 * const fns[4] = {
1075 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1076 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1077 };
1078 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1079 return false;
1080 }
1081 return do_zpzi_ool(s, a, fns[a->esz]);
1082}
1083
1084static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1085{
1086 static gen_helper_gvec_3 * const fns[4] = {
1087 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1088 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1089 };
1090 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1091 return false;
1092 }
1093 return do_zpzi_ool(s, a, fns[a->esz]);
1094}
1095
1096static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1097{
1098 static gen_helper_gvec_3 * const fns[4] = {
1099 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1100 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1101 };
1102 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1103 return false;
1104 }
1105 return do_zpzi_ool(s, a, fns[a->esz]);
1106}
1107
1108static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1109{
1110 static gen_helper_gvec_3 * const fns[4] = {
1111 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1112 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1113 };
1114 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1115 return false;
1116 }
1117 return do_zpzi_ool(s, a, fns[a->esz]);
1118}
1119
1120static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1121{
1122 static gen_helper_gvec_3 * const fns[4] = {
1123 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1124 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1125 };
1126 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1127 return false;
1128 }
1129 return do_zpzi_ool(s, a, fns[a->esz]);
1130}
1131
fe7f8dfb
RH
1132/*
1133 *** SVE Bitwise Shift - Predicated Group
1134 */
1135
1136#define DO_ZPZW(NAME, name) \
3a7be554 1137static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1138{ \
1139 static gen_helper_gvec_4 * const fns[3] = { \
1140 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1141 gen_helper_sve_##name##_zpzw_s, \
1142 }; \
1143 if (a->esz < 0 || a->esz >= 3) { \
1144 return false; \
1145 } \
1146 return do_zpzz_ool(s, a, fns[a->esz]); \
1147}
1148
1149DO_ZPZW(ASR, asr)
1150DO_ZPZW(LSR, lsr)
1151DO_ZPZW(LSL, lsl)
1152
1153#undef DO_ZPZW
1154
d9d78dcc
RH
1155/*
1156 *** SVE Bitwise Shift - Unpredicated Group
1157 */
1158
1159static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1160 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1161 int64_t, uint32_t, uint32_t))
1162{
1163 if (a->esz < 0) {
1164 /* Invalid tsz encoding -- see tszimm_esz. */
1165 return false;
1166 }
1167 if (sve_access_check(s)) {
1168 unsigned vsz = vec_full_reg_size(s);
1169 /* Shift by element size is architecturally valid. For
1170 arithmetic right-shift, it's the same as by one less.
1171 Otherwise it is a zeroing operation. */
1172 if (a->imm >= 8 << a->esz) {
1173 if (asr) {
1174 a->imm = (8 << a->esz) - 1;
1175 } else {
1176 do_dupi_z(s, a->rd, 0);
1177 return true;
1178 }
1179 }
1180 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1181 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1182 }
1183 return true;
1184}
1185
3a7be554 1186static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1187{
1188 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1189}
1190
3a7be554 1191static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1192{
1193 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1194}
1195
3a7be554 1196static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1197{
1198 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1199}
1200
d9d78dcc 1201#define DO_ZZW(NAME, name) \
32e2ad65 1202 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1203 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1204 gen_helper_sve_##name##_zzw_s, NULL \
1205 }; \
32e2ad65
RH
1206 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1207 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1208
32e2ad65
RH
1209DO_ZZW(ASR_zzw, asr)
1210DO_ZZW(LSR_zzw, lsr)
1211DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1212
1213#undef DO_ZZW
1214
96a36e4a
RH
1215/*
1216 *** SVE Integer Multiply-Add Group
1217 */
1218
1219static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1220 gen_helper_gvec_5 *fn)
1221{
1222 if (sve_access_check(s)) {
1223 unsigned vsz = vec_full_reg_size(s);
1224 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1225 vec_full_reg_offset(s, a->ra),
1226 vec_full_reg_offset(s, a->rn),
1227 vec_full_reg_offset(s, a->rm),
1228 pred_full_reg_offset(s, a->pg),
1229 vsz, vsz, 0, fn);
1230 }
1231 return true;
1232}
1233
1234#define DO_ZPZZZ(NAME, name) \
3a7be554 1235static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1236{ \
1237 static gen_helper_gvec_5 * const fns[4] = { \
1238 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1239 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1240 }; \
1241 return do_zpzzz_ool(s, a, fns[a->esz]); \
1242}
1243
1244DO_ZPZZZ(MLA, mla)
1245DO_ZPZZZ(MLS, mls)
1246
1247#undef DO_ZPZZZ
1248
9a56c9c3
RH
1249/*
1250 *** SVE Index Generation Group
1251 */
1252
1253static void do_index(DisasContext *s, int esz, int rd,
1254 TCGv_i64 start, TCGv_i64 incr)
1255{
1256 unsigned vsz = vec_full_reg_size(s);
c6a59b55 1257 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
9a56c9c3
RH
1258 TCGv_ptr t_zd = tcg_temp_new_ptr();
1259
1260 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1261 if (esz == 3) {
1262 gen_helper_sve_index_d(t_zd, start, incr, desc);
1263 } else {
1264 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1265 static index_fn * const fns[3] = {
1266 gen_helper_sve_index_b,
1267 gen_helper_sve_index_h,
1268 gen_helper_sve_index_s,
1269 };
1270 TCGv_i32 s32 = tcg_temp_new_i32();
1271 TCGv_i32 i32 = tcg_temp_new_i32();
1272
1273 tcg_gen_extrl_i64_i32(s32, start);
1274 tcg_gen_extrl_i64_i32(i32, incr);
1275 fns[esz](t_zd, s32, i32, desc);
1276
1277 tcg_temp_free_i32(s32);
1278 tcg_temp_free_i32(i32);
1279 }
1280 tcg_temp_free_ptr(t_zd);
9a56c9c3
RH
1281}
1282
3a7be554 1283static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1284{
1285 if (sve_access_check(s)) {
b0c3aece
RH
1286 TCGv_i64 start = tcg_constant_i64(a->imm1);
1287 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1288 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1289 }
1290 return true;
1291}
1292
3a7be554 1293static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1294{
1295 if (sve_access_check(s)) {
b0c3aece 1296 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1297 TCGv_i64 incr = cpu_reg(s, a->rm);
1298 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1299 }
1300 return true;
1301}
1302
3a7be554 1303static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1304{
1305 if (sve_access_check(s)) {
1306 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1307 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1308 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1309 }
1310 return true;
1311}
1312
3a7be554 1313static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1314{
1315 if (sve_access_check(s)) {
1316 TCGv_i64 start = cpu_reg(s, a->rn);
1317 TCGv_i64 incr = cpu_reg(s, a->rm);
1318 do_index(s, a->esz, a->rd, start, incr);
1319 }
1320 return true;
1321}
1322
96f922cc
RH
1323/*
1324 *** SVE Stack Allocation Group
1325 */
1326
3a7be554 1327static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1328{
5de56742
AC
1329 if (sve_access_check(s)) {
1330 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1331 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1332 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1333 }
96f922cc
RH
1334 return true;
1335}
1336
3a7be554 1337static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1338{
5de56742
AC
1339 if (sve_access_check(s)) {
1340 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1341 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1342 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1343 }
96f922cc
RH
1344 return true;
1345}
1346
3a7be554 1347static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1348{
5de56742
AC
1349 if (sve_access_check(s)) {
1350 TCGv_i64 reg = cpu_reg(s, a->rd);
1351 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1352 }
96f922cc
RH
1353 return true;
1354}
1355
4b242d9c
RH
1356/*
1357 *** SVE Compute Vector Address Group
1358 */
1359
1360static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1361{
913a8a00 1362 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1363}
1364
3a7be554 1365static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1366{
1367 return do_adr(s, a, gen_helper_sve_adr_p32);
1368}
1369
3a7be554 1370static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1371{
1372 return do_adr(s, a, gen_helper_sve_adr_p64);
1373}
1374
3a7be554 1375static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1376{
1377 return do_adr(s, a, gen_helper_sve_adr_s32);
1378}
1379
3a7be554 1380static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1381{
1382 return do_adr(s, a, gen_helper_sve_adr_u32);
1383}
1384
0762cd42
RH
1385/*
1386 *** SVE Integer Misc - Unpredicated Group
1387 */
1388
0ea3cdbf
RH
1389static gen_helper_gvec_2 * const fexpa_fns[4] = {
1390 NULL, gen_helper_sve_fexpa_h,
1391 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1392};
1393TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1394 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1395
32e2ad65
RH
1396static gen_helper_gvec_3 * const ftssel_fns[4] = {
1397 NULL, gen_helper_sve_ftssel_h,
1398 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1399};
1400TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1401
516e246a
RH
1402/*
1403 *** SVE Predicate Logical Operations Group
1404 */
1405
1406static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1407 const GVecGen4 *gvec_op)
1408{
1409 if (!sve_access_check(s)) {
1410 return true;
1411 }
1412
1413 unsigned psz = pred_gvec_reg_size(s);
1414 int dofs = pred_full_reg_offset(s, a->rd);
1415 int nofs = pred_full_reg_offset(s, a->rn);
1416 int mofs = pred_full_reg_offset(s, a->rm);
1417 int gofs = pred_full_reg_offset(s, a->pg);
1418
dd81a8d7
RH
1419 if (!a->s) {
1420 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1421 return true;
1422 }
1423
516e246a
RH
1424 if (psz == 8) {
1425 /* Do the operation and the flags generation in temps. */
1426 TCGv_i64 pd = tcg_temp_new_i64();
1427 TCGv_i64 pn = tcg_temp_new_i64();
1428 TCGv_i64 pm = tcg_temp_new_i64();
1429 TCGv_i64 pg = tcg_temp_new_i64();
1430
1431 tcg_gen_ld_i64(pn, cpu_env, nofs);
1432 tcg_gen_ld_i64(pm, cpu_env, mofs);
1433 tcg_gen_ld_i64(pg, cpu_env, gofs);
1434
1435 gvec_op->fni8(pd, pn, pm, pg);
1436 tcg_gen_st_i64(pd, cpu_env, dofs);
1437
1438 do_predtest1(pd, pg);
1439
1440 tcg_temp_free_i64(pd);
1441 tcg_temp_free_i64(pn);
1442 tcg_temp_free_i64(pm);
1443 tcg_temp_free_i64(pg);
1444 } else {
1445 /* The operation and flags generation is large. The computation
1446 * of the flags depends on the original contents of the guarding
1447 * predicate. If the destination overwrites the guarding predicate,
1448 * then the easiest way to get this right is to save a copy.
1449 */
1450 int tofs = gofs;
1451 if (a->rd == a->pg) {
1452 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1453 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1454 }
1455
1456 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1457 do_predtest(s, dofs, tofs, psz / 8);
1458 }
1459 return true;
1460}
1461
1462static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1463{
1464 tcg_gen_and_i64(pd, pn, pm);
1465 tcg_gen_and_i64(pd, pd, pg);
1466}
1467
1468static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1469 TCGv_vec pm, TCGv_vec pg)
1470{
1471 tcg_gen_and_vec(vece, pd, pn, pm);
1472 tcg_gen_and_vec(vece, pd, pd, pg);
1473}
1474
3a7be554 1475static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1476{
1477 static const GVecGen4 op = {
1478 .fni8 = gen_and_pg_i64,
1479 .fniv = gen_and_pg_vec,
1480 .fno = gen_helper_sve_and_pppp,
1481 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1482 };
dd81a8d7
RH
1483
1484 if (!a->s) {
1485 if (!sve_access_check(s)) {
1486 return true;
1487 }
1488 if (a->rn == a->rm) {
1489 if (a->pg == a->rn) {
1490 do_mov_p(s, a->rd, a->rn);
1491 } else {
1492 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1493 }
1494 return true;
1495 } else if (a->pg == a->rn || a->pg == a->rm) {
1496 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1497 return true;
516e246a 1498 }
516e246a 1499 }
dd81a8d7 1500 return do_pppp_flags(s, a, &op);
516e246a
RH
1501}
1502
1503static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1504{
1505 tcg_gen_andc_i64(pd, pn, pm);
1506 tcg_gen_and_i64(pd, pd, pg);
1507}
1508
1509static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1510 TCGv_vec pm, TCGv_vec pg)
1511{
1512 tcg_gen_andc_vec(vece, pd, pn, pm);
1513 tcg_gen_and_vec(vece, pd, pd, pg);
1514}
1515
3a7be554 1516static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1517{
1518 static const GVecGen4 op = {
1519 .fni8 = gen_bic_pg_i64,
1520 .fniv = gen_bic_pg_vec,
1521 .fno = gen_helper_sve_bic_pppp,
1522 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1523 };
dd81a8d7
RH
1524
1525 if (!a->s && a->pg == a->rn) {
1526 if (sve_access_check(s)) {
1527 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1528 }
1529 return true;
516e246a 1530 }
dd81a8d7 1531 return do_pppp_flags(s, a, &op);
516e246a
RH
1532}
1533
1534static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1535{
1536 tcg_gen_xor_i64(pd, pn, pm);
1537 tcg_gen_and_i64(pd, pd, pg);
1538}
1539
1540static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1541 TCGv_vec pm, TCGv_vec pg)
1542{
1543 tcg_gen_xor_vec(vece, pd, pn, pm);
1544 tcg_gen_and_vec(vece, pd, pd, pg);
1545}
1546
3a7be554 1547static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1548{
1549 static const GVecGen4 op = {
1550 .fni8 = gen_eor_pg_i64,
1551 .fniv = gen_eor_pg_vec,
1552 .fno = gen_helper_sve_eor_pppp,
1553 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1554 };
dd81a8d7 1555 return do_pppp_flags(s, a, &op);
516e246a
RH
1556}
1557
3a7be554 1558static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1559{
516e246a
RH
1560 if (a->s) {
1561 return false;
516e246a 1562 }
d4bc6232
RH
1563 if (sve_access_check(s)) {
1564 unsigned psz = pred_gvec_reg_size(s);
1565 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1566 pred_full_reg_offset(s, a->pg),
1567 pred_full_reg_offset(s, a->rn),
1568 pred_full_reg_offset(s, a->rm), psz, psz);
1569 }
1570 return true;
516e246a
RH
1571}
1572
1573static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1574{
1575 tcg_gen_or_i64(pd, pn, pm);
1576 tcg_gen_and_i64(pd, pd, pg);
1577}
1578
1579static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1580 TCGv_vec pm, TCGv_vec pg)
1581{
1582 tcg_gen_or_vec(vece, pd, pn, pm);
1583 tcg_gen_and_vec(vece, pd, pd, pg);
1584}
1585
3a7be554 1586static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1587{
1588 static const GVecGen4 op = {
1589 .fni8 = gen_orr_pg_i64,
1590 .fniv = gen_orr_pg_vec,
1591 .fno = gen_helper_sve_orr_pppp,
1592 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1593 };
dd81a8d7
RH
1594
1595 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1596 return do_mov_p(s, a->rd, a->rn);
516e246a 1597 }
dd81a8d7 1598 return do_pppp_flags(s, a, &op);
516e246a
RH
1599}
1600
1601static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1602{
1603 tcg_gen_orc_i64(pd, pn, pm);
1604 tcg_gen_and_i64(pd, pd, pg);
1605}
1606
1607static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1608 TCGv_vec pm, TCGv_vec pg)
1609{
1610 tcg_gen_orc_vec(vece, pd, pn, pm);
1611 tcg_gen_and_vec(vece, pd, pd, pg);
1612}
1613
3a7be554 1614static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1615{
1616 static const GVecGen4 op = {
1617 .fni8 = gen_orn_pg_i64,
1618 .fniv = gen_orn_pg_vec,
1619 .fno = gen_helper_sve_orn_pppp,
1620 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1621 };
dd81a8d7 1622 return do_pppp_flags(s, a, &op);
516e246a
RH
1623}
1624
1625static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1626{
1627 tcg_gen_or_i64(pd, pn, pm);
1628 tcg_gen_andc_i64(pd, pg, pd);
1629}
1630
1631static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1632 TCGv_vec pm, TCGv_vec pg)
1633{
1634 tcg_gen_or_vec(vece, pd, pn, pm);
1635 tcg_gen_andc_vec(vece, pd, pg, pd);
1636}
1637
3a7be554 1638static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1639{
1640 static const GVecGen4 op = {
1641 .fni8 = gen_nor_pg_i64,
1642 .fniv = gen_nor_pg_vec,
1643 .fno = gen_helper_sve_nor_pppp,
1644 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1645 };
dd81a8d7 1646 return do_pppp_flags(s, a, &op);
516e246a
RH
1647}
1648
1649static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1650{
1651 tcg_gen_and_i64(pd, pn, pm);
1652 tcg_gen_andc_i64(pd, pg, pd);
1653}
1654
1655static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1656 TCGv_vec pm, TCGv_vec pg)
1657{
1658 tcg_gen_and_vec(vece, pd, pn, pm);
1659 tcg_gen_andc_vec(vece, pd, pg, pd);
1660}
1661
3a7be554 1662static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1663{
1664 static const GVecGen4 op = {
1665 .fni8 = gen_nand_pg_i64,
1666 .fniv = gen_nand_pg_vec,
1667 .fno = gen_helper_sve_nand_pppp,
1668 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1669 };
dd81a8d7 1670 return do_pppp_flags(s, a, &op);
516e246a
RH
1671}
1672
9e18d7a6
RH
1673/*
1674 *** SVE Predicate Misc Group
1675 */
1676
3a7be554 1677static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1678{
1679 if (sve_access_check(s)) {
1680 int nofs = pred_full_reg_offset(s, a->rn);
1681 int gofs = pred_full_reg_offset(s, a->pg);
1682 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1683
1684 if (words == 1) {
1685 TCGv_i64 pn = tcg_temp_new_i64();
1686 TCGv_i64 pg = tcg_temp_new_i64();
1687
1688 tcg_gen_ld_i64(pn, cpu_env, nofs);
1689 tcg_gen_ld_i64(pg, cpu_env, gofs);
1690 do_predtest1(pn, pg);
1691
1692 tcg_temp_free_i64(pn);
1693 tcg_temp_free_i64(pg);
1694 } else {
1695 do_predtest(s, nofs, gofs, words);
1696 }
1697 }
1698 return true;
1699}
1700
028e2a7b
RH
1701/* See the ARM pseudocode DecodePredCount. */
1702static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1703{
1704 unsigned elements = fullsz >> esz;
1705 unsigned bound;
1706
1707 switch (pattern) {
1708 case 0x0: /* POW2 */
1709 return pow2floor(elements);
1710 case 0x1: /* VL1 */
1711 case 0x2: /* VL2 */
1712 case 0x3: /* VL3 */
1713 case 0x4: /* VL4 */
1714 case 0x5: /* VL5 */
1715 case 0x6: /* VL6 */
1716 case 0x7: /* VL7 */
1717 case 0x8: /* VL8 */
1718 bound = pattern;
1719 break;
1720 case 0x9: /* VL16 */
1721 case 0xa: /* VL32 */
1722 case 0xb: /* VL64 */
1723 case 0xc: /* VL128 */
1724 case 0xd: /* VL256 */
1725 bound = 16 << (pattern - 9);
1726 break;
1727 case 0x1d: /* MUL4 */
1728 return elements - elements % 4;
1729 case 0x1e: /* MUL3 */
1730 return elements - elements % 3;
1731 case 0x1f: /* ALL */
1732 return elements;
1733 default: /* #uimm5 */
1734 return 0;
1735 }
1736 return elements >= bound ? bound : 0;
1737}
1738
1739/* This handles all of the predicate initialization instructions,
1740 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1741 * so that decode_pred_count returns 0. For SETFFR, we will have
1742 * set RD == 16 == FFR.
1743 */
1744static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1745{
1746 if (!sve_access_check(s)) {
1747 return true;
1748 }
1749
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned ofs = pred_full_reg_offset(s, rd);
1752 unsigned numelem, setsz, i;
1753 uint64_t word, lastword;
1754 TCGv_i64 t;
1755
1756 numelem = decode_pred_count(fullsz, pat, esz);
1757
1758 /* Determine what we must store into each bit, and how many. */
1759 if (numelem == 0) {
1760 lastword = word = 0;
1761 setsz = fullsz;
1762 } else {
1763 setsz = numelem << esz;
1764 lastword = word = pred_esz_masks[esz];
1765 if (setsz % 64) {
973558a3 1766 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1767 }
1768 }
1769
1770 t = tcg_temp_new_i64();
1771 if (fullsz <= 64) {
1772 tcg_gen_movi_i64(t, lastword);
1773 tcg_gen_st_i64(t, cpu_env, ofs);
1774 goto done;
1775 }
1776
1777 if (word == lastword) {
1778 unsigned maxsz = size_for_gvec(fullsz / 8);
1779 unsigned oprsz = size_for_gvec(setsz / 8);
1780
1781 if (oprsz * 8 == setsz) {
8711e71f 1782 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1783 goto done;
1784 }
028e2a7b
RH
1785 }
1786
1787 setsz /= 8;
1788 fullsz /= 8;
1789
1790 tcg_gen_movi_i64(t, word);
973558a3 1791 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1792 tcg_gen_st_i64(t, cpu_env, ofs + i);
1793 }
1794 if (lastword != word) {
1795 tcg_gen_movi_i64(t, lastword);
1796 tcg_gen_st_i64(t, cpu_env, ofs + i);
1797 i += 8;
1798 }
1799 if (i < fullsz) {
1800 tcg_gen_movi_i64(t, 0);
1801 for (; i < fullsz; i += 8) {
1802 tcg_gen_st_i64(t, cpu_env, ofs + i);
1803 }
1804 }
1805
1806 done:
1807 tcg_temp_free_i64(t);
1808
1809 /* PTRUES */
1810 if (setflag) {
1811 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1812 tcg_gen_movi_i32(cpu_CF, word == 0);
1813 tcg_gen_movi_i32(cpu_VF, 0);
1814 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1815 }
1816 return true;
1817}
1818
3a7be554 1819static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1820{
1821 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1822}
1823
3a7be554 1824static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1825{
1826 /* Note pat == 31 is #all, to set all elements. */
1827 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1828}
1829
3a7be554 1830static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1831{
1832 /* Note pat == 32 is #unimp, to set no elements. */
1833 return do_predset(s, 0, a->rd, 32, false);
1834}
1835
3a7be554 1836static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1837{
1838 /* The path through do_pppp_flags is complicated enough to want to avoid
1839 * duplication. Frob the arguments into the form of a predicated AND.
1840 */
1841 arg_rprr_s alt_a = {
1842 .rd = a->rd, .pg = a->pg, .s = a->s,
1843 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1844 };
3a7be554 1845 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1846}
1847
3a7be554 1848static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1849{
1850 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1851}
1852
3a7be554 1853static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1854{
1855 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1856}
1857
1858static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1859 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1860 TCGv_ptr, TCGv_i32))
1861{
1862 if (!sve_access_check(s)) {
1863 return true;
1864 }
1865
1866 TCGv_ptr t_pd = tcg_temp_new_ptr();
1867 TCGv_ptr t_pg = tcg_temp_new_ptr();
1868 TCGv_i32 t;
86300b5d 1869 unsigned desc = 0;
028e2a7b 1870
86300b5d
RH
1871 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1872 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1873
1874 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1875 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1876 t = tcg_temp_new_i32();
028e2a7b 1877
392acacc 1878 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1879 tcg_temp_free_ptr(t_pd);
1880 tcg_temp_free_ptr(t_pg);
1881
1882 do_pred_flags(t);
1883 tcg_temp_free_i32(t);
1884 return true;
1885}
1886
3a7be554 1887static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1888{
1889 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1890}
1891
3a7be554 1892static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1893{
1894 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1895}
1896
24e82e68
RH
1897/*
1898 *** SVE Element Count Group
1899 */
1900
1901/* Perform an inline saturating addition of a 32-bit value within
1902 * a 64-bit register. The second operand is known to be positive,
1903 * which halves the comparisions we must perform to bound the result.
1904 */
1905static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1906{
1907 int64_t ibound;
24e82e68
RH
1908
1909 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1910 if (u) {
1911 tcg_gen_ext32u_i64(reg, reg);
1912 } else {
1913 tcg_gen_ext32s_i64(reg, reg);
1914 }
1915 if (d) {
1916 tcg_gen_sub_i64(reg, reg, val);
1917 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1918 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1919 } else {
1920 tcg_gen_add_i64(reg, reg, val);
1921 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1922 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1923 }
24e82e68
RH
1924}
1925
1926/* Similarly with 64-bit values. */
1927static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1928{
1929 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1930 TCGv_i64 t2;
1931
1932 if (u) {
1933 if (d) {
1934 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1935 t2 = tcg_constant_i64(0);
1936 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1937 } else {
1938 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1939 t2 = tcg_constant_i64(-1);
1940 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1941 }
1942 } else {
35a1ec8e 1943 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1944 if (d) {
1945 /* Detect signed overflow for subtraction. */
1946 tcg_gen_xor_i64(t0, reg, val);
1947 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1948 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1949 tcg_gen_and_i64(t0, t0, reg);
1950
1951 /* Bound the result. */
1952 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1953 t2 = tcg_constant_i64(0);
24e82e68
RH
1954 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1955 } else {
1956 /* Detect signed overflow for addition. */
1957 tcg_gen_xor_i64(t0, reg, val);
1958 tcg_gen_add_i64(reg, reg, val);
1959 tcg_gen_xor_i64(t1, reg, val);
1960 tcg_gen_andc_i64(t0, t1, t0);
1961
1962 /* Bound the result. */
1963 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1964 t2 = tcg_constant_i64(0);
24e82e68
RH
1965 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1966 }
35a1ec8e 1967 tcg_temp_free_i64(t1);
24e82e68
RH
1968 }
1969 tcg_temp_free_i64(t0);
24e82e68
RH
1970}
1971
1972/* Similarly with a vector and a scalar operand. */
1973static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1974 TCGv_i64 val, bool u, bool d)
1975{
1976 unsigned vsz = vec_full_reg_size(s);
1977 TCGv_ptr dptr, nptr;
1978 TCGv_i32 t32, desc;
1979 TCGv_i64 t64;
1980
1981 dptr = tcg_temp_new_ptr();
1982 nptr = tcg_temp_new_ptr();
1983 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1984 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1985 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1986
1987 switch (esz) {
1988 case MO_8:
1989 t32 = tcg_temp_new_i32();
1990 tcg_gen_extrl_i64_i32(t32, val);
1991 if (d) {
1992 tcg_gen_neg_i32(t32, t32);
1993 }
1994 if (u) {
1995 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1996 } else {
1997 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1998 }
1999 tcg_temp_free_i32(t32);
2000 break;
2001
2002 case MO_16:
2003 t32 = tcg_temp_new_i32();
2004 tcg_gen_extrl_i64_i32(t32, val);
2005 if (d) {
2006 tcg_gen_neg_i32(t32, t32);
2007 }
2008 if (u) {
2009 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2010 } else {
2011 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2012 }
2013 tcg_temp_free_i32(t32);
2014 break;
2015
2016 case MO_32:
2017 t64 = tcg_temp_new_i64();
2018 if (d) {
2019 tcg_gen_neg_i64(t64, val);
2020 } else {
2021 tcg_gen_mov_i64(t64, val);
2022 }
2023 if (u) {
2024 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2025 } else {
2026 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2027 }
2028 tcg_temp_free_i64(t64);
2029 break;
2030
2031 case MO_64:
2032 if (u) {
2033 if (d) {
2034 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2035 } else {
2036 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2037 }
2038 } else if (d) {
2039 t64 = tcg_temp_new_i64();
2040 tcg_gen_neg_i64(t64, val);
2041 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2042 tcg_temp_free_i64(t64);
2043 } else {
2044 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2045 }
2046 break;
2047
2048 default:
2049 g_assert_not_reached();
2050 }
2051
2052 tcg_temp_free_ptr(dptr);
2053 tcg_temp_free_ptr(nptr);
24e82e68
RH
2054}
2055
3a7be554 2056static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2057{
2058 if (sve_access_check(s)) {
2059 unsigned fullsz = vec_full_reg_size(s);
2060 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2061 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2062 }
2063 return true;
2064}
2065
3a7be554 2066static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2067{
2068 if (sve_access_check(s)) {
2069 unsigned fullsz = vec_full_reg_size(s);
2070 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2071 int inc = numelem * a->imm * (a->d ? -1 : 1);
2072 TCGv_i64 reg = cpu_reg(s, a->rd);
2073
2074 tcg_gen_addi_i64(reg, reg, inc);
2075 }
2076 return true;
2077}
2078
3a7be554 2079static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2080{
2081 if (!sve_access_check(s)) {
2082 return true;
2083 }
2084
2085 unsigned fullsz = vec_full_reg_size(s);
2086 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2087 int inc = numelem * a->imm;
2088 TCGv_i64 reg = cpu_reg(s, a->rd);
2089
2090 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2091 if (inc == 0) {
2092 if (a->u) {
2093 tcg_gen_ext32u_i64(reg, reg);
2094 } else {
2095 tcg_gen_ext32s_i64(reg, reg);
2096 }
2097 } else {
d681f125 2098 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2099 }
2100 return true;
2101}
2102
3a7be554 2103static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2104{
2105 if (!sve_access_check(s)) {
2106 return true;
2107 }
2108
2109 unsigned fullsz = vec_full_reg_size(s);
2110 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2111 int inc = numelem * a->imm;
2112 TCGv_i64 reg = cpu_reg(s, a->rd);
2113
2114 if (inc != 0) {
d681f125 2115 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2116 }
2117 return true;
2118}
2119
3a7be554 2120static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2121{
2122 if (a->esz == 0) {
2123 return false;
2124 }
2125
2126 unsigned fullsz = vec_full_reg_size(s);
2127 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2128 int inc = numelem * a->imm;
2129
2130 if (inc != 0) {
2131 if (sve_access_check(s)) {
24e82e68
RH
2132 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2133 vec_full_reg_offset(s, a->rn),
d681f125
RH
2134 tcg_constant_i64(a->d ? -inc : inc),
2135 fullsz, fullsz);
24e82e68
RH
2136 }
2137 } else {
2138 do_mov_z(s, a->rd, a->rn);
2139 }
2140 return true;
2141}
2142
3a7be554 2143static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2144{
2145 if (a->esz == 0) {
2146 return false;
2147 }
2148
2149 unsigned fullsz = vec_full_reg_size(s);
2150 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2151 int inc = numelem * a->imm;
2152
2153 if (inc != 0) {
2154 if (sve_access_check(s)) {
d681f125
RH
2155 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2156 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2157 }
2158 } else {
2159 do_mov_z(s, a->rd, a->rn);
2160 }
2161 return true;
2162}
2163
e1fa1164
RH
2164/*
2165 *** SVE Bitwise Immediate Group
2166 */
2167
2168static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2169{
2170 uint64_t imm;
2171 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2172 extract32(a->dbm, 0, 6),
2173 extract32(a->dbm, 6, 6))) {
2174 return false;
2175 }
2176 if (sve_access_check(s)) {
2177 unsigned vsz = vec_full_reg_size(s);
2178 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2179 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2180 }
2181 return true;
2182}
2183
3a7be554 2184static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2185{
2186 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2187}
2188
3a7be554 2189static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2190{
2191 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2192}
2193
3a7be554 2194static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2195{
2196 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2197}
2198
3a7be554 2199static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2200{
2201 uint64_t imm;
2202 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2203 extract32(a->dbm, 0, 6),
2204 extract32(a->dbm, 6, 6))) {
2205 return false;
2206 }
2207 if (sve_access_check(s)) {
2208 do_dupi_z(s, a->rd, imm);
2209 }
2210 return true;
2211}
2212
f25a2361
RH
2213/*
2214 *** SVE Integer Wide Immediate - Predicated Group
2215 */
2216
2217/* Implement all merging copies. This is used for CPY (immediate),
2218 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2219 */
2220static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2221 TCGv_i64 val)
2222{
2223 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2224 static gen_cpy * const fns[4] = {
2225 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2226 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2227 };
2228 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2229 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2230 TCGv_ptr t_zd = tcg_temp_new_ptr();
2231 TCGv_ptr t_zn = tcg_temp_new_ptr();
2232 TCGv_ptr t_pg = tcg_temp_new_ptr();
2233
2234 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2235 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2236 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2237
2238 fns[esz](t_zd, t_zn, t_pg, val, desc);
2239
2240 tcg_temp_free_ptr(t_zd);
2241 tcg_temp_free_ptr(t_zn);
2242 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2243}
2244
3a7be554 2245static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2246{
2247 if (a->esz == 0) {
2248 return false;
2249 }
2250 if (sve_access_check(s)) {
2251 /* Decode the VFP immediate. */
2252 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2253 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2254 }
2255 return true;
2256}
2257
3a7be554 2258static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2259{
3a7be554 2260 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2261 return false;
2262 }
2263 if (sve_access_check(s)) {
e152b48b 2264 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2265 }
2266 return true;
2267}
2268
3a7be554 2269static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2270{
2271 static gen_helper_gvec_2i * const fns[4] = {
2272 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2273 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2274 };
2275
3a7be554 2276 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2277 return false;
2278 }
2279 if (sve_access_check(s)) {
2280 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2281 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2282 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2283 tcg_constant_i64(a->imm),
2284 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2285 }
2286 return true;
2287}
2288
b94f8f60
RH
2289/*
2290 *** SVE Permute Extract Group
2291 */
2292
75114792 2293static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2294{
2295 if (!sve_access_check(s)) {
2296 return true;
2297 }
2298
2299 unsigned vsz = vec_full_reg_size(s);
75114792 2300 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2301 unsigned n_siz = vsz - n_ofs;
75114792
SL
2302 unsigned d = vec_full_reg_offset(s, rd);
2303 unsigned n = vec_full_reg_offset(s, rn);
2304 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2305
2306 /* Use host vector move insns if we have appropriate sizes
2307 * and no unfortunate overlap.
2308 */
2309 if (m != d
2310 && n_ofs == size_for_gvec(n_ofs)
2311 && n_siz == size_for_gvec(n_siz)
2312 && (d != n || n_siz <= n_ofs)) {
2313 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2314 if (n_ofs != 0) {
2315 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2316 }
2317 } else {
2318 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2319 }
2320 return true;
2321}
2322
75114792
SL
2323static bool trans_EXT(DisasContext *s, arg_EXT *a)
2324{
2325 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2326}
2327
2328static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2329{
2330 if (!dc_isar_feature(aa64_sve2, s)) {
2331 return false;
2332 }
2333 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2334}
2335
30562ab7
RH
2336/*
2337 *** SVE Permute - Unpredicated Group
2338 */
2339
3a7be554 2340static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2341{
2342 if (sve_access_check(s)) {
2343 unsigned vsz = vec_full_reg_size(s);
2344 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2345 vsz, vsz, cpu_reg_sp(s, a->rn));
2346 }
2347 return true;
2348}
2349
3a7be554 2350static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2351{
2352 if ((a->imm & 0x1f) == 0) {
2353 return false;
2354 }
2355 if (sve_access_check(s)) {
2356 unsigned vsz = vec_full_reg_size(s);
2357 unsigned dofs = vec_full_reg_offset(s, a->rd);
2358 unsigned esz, index;
2359
2360 esz = ctz32(a->imm);
2361 index = a->imm >> (esz + 1);
2362
2363 if ((index << esz) < vsz) {
2364 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2365 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2366 } else {
7e17d50e
RH
2367 /*
2368 * While dup_mem handles 128-bit elements, dup_imm does not.
2369 * Thankfully element size doesn't matter for splatting zero.
2370 */
2371 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2372 }
2373 }
2374 return true;
2375}
2376
2377static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2378{
2379 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2380 static gen_insr * const fns[4] = {
2381 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2382 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2383 };
2384 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2385 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2386 TCGv_ptr t_zd = tcg_temp_new_ptr();
2387 TCGv_ptr t_zn = tcg_temp_new_ptr();
2388
2389 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2390 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2391
2392 fns[a->esz](t_zd, t_zn, val, desc);
2393
2394 tcg_temp_free_ptr(t_zd);
2395 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2396}
2397
3a7be554 2398static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2399{
2400 if (sve_access_check(s)) {
2401 TCGv_i64 t = tcg_temp_new_i64();
2402 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2403 do_insr_i64(s, a, t);
2404 tcg_temp_free_i64(t);
2405 }
2406 return true;
2407}
2408
3a7be554 2409static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2410{
2411 if (sve_access_check(s)) {
2412 do_insr_i64(s, a, cpu_reg(s, a->rm));
2413 }
2414 return true;
2415}
2416
0ea3cdbf
RH
2417static gen_helper_gvec_2 * const rev_fns[4] = {
2418 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2419 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2420};
2421TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2422
32e2ad65
RH
2423static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2424 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2425 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2426};
2427TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2428
5f425b92
RH
2429static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2430 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2431 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2432};
2433TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2434 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2435
32e2ad65
RH
2436static gen_helper_gvec_3 * const tbx_fns[4] = {
2437 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2438 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2439};
2440TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2441
3a7be554 2442static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2443{
2444 static gen_helper_gvec_2 * const fns[4][2] = {
2445 { NULL, NULL },
2446 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2447 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2448 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2449 };
2450
2451 if (a->esz == 0) {
2452 return false;
2453 }
2454 if (sve_access_check(s)) {
2455 unsigned vsz = vec_full_reg_size(s);
2456 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2457 vec_full_reg_offset(s, a->rn)
2458 + (a->h ? vsz / 2 : 0),
2459 vsz, vsz, 0, fns[a->esz][a->u]);
2460 }
2461 return true;
2462}
2463
d731d8cb
RH
2464/*
2465 *** SVE Permute - Predicates Group
2466 */
2467
2468static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2469 gen_helper_gvec_3 *fn)
2470{
2471 if (!sve_access_check(s)) {
2472 return true;
2473 }
2474
2475 unsigned vsz = pred_full_reg_size(s);
2476
d731d8cb
RH
2477 TCGv_ptr t_d = tcg_temp_new_ptr();
2478 TCGv_ptr t_n = tcg_temp_new_ptr();
2479 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2480 uint32_t desc = 0;
d731d8cb 2481
f9b0fcce
RH
2482 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2483 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2484 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2485
2486 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2487 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2488 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2489
c6a59b55 2490 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2491
2492 tcg_temp_free_ptr(t_d);
2493 tcg_temp_free_ptr(t_n);
2494 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2495 return true;
2496}
2497
2498static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2499 gen_helper_gvec_2 *fn)
2500{
2501 if (!sve_access_check(s)) {
2502 return true;
2503 }
2504
2505 unsigned vsz = pred_full_reg_size(s);
2506 TCGv_ptr t_d = tcg_temp_new_ptr();
2507 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2508 uint32_t desc = 0;
d731d8cb
RH
2509
2510 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2511 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2512
70acaafe
RH
2513 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2514 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2515 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2516
c6a59b55 2517 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2518
d731d8cb
RH
2519 tcg_temp_free_ptr(t_d);
2520 tcg_temp_free_ptr(t_n);
2521 return true;
2522}
2523
3a7be554 2524static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2525{
2526 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2527}
2528
3a7be554 2529static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2530{
2531 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2532}
2533
3a7be554 2534static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2535{
2536 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2537}
2538
3a7be554 2539static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2540{
2541 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2542}
2543
3a7be554 2544static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2545{
2546 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2547}
2548
3a7be554 2549static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2550{
2551 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2552}
2553
3a7be554 2554static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2555{
2556 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2557}
2558
3a7be554 2559static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2560{
2561 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2562}
2563
3a7be554 2564static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2565{
2566 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2567}
2568
234b48e9
RH
2569/*
2570 *** SVE Permute - Interleaving Group
2571 */
2572
2573static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2574{
2575 static gen_helper_gvec_3 * const fns[4] = {
2576 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2577 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2578 };
2579
2580 if (sve_access_check(s)) {
2581 unsigned vsz = vec_full_reg_size(s);
2582 unsigned high_ofs = high ? vsz / 2 : 0;
2583 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2584 vec_full_reg_offset(s, a->rn) + high_ofs,
2585 vec_full_reg_offset(s, a->rm) + high_ofs,
2586 vsz, vsz, 0, fns[a->esz]);
2587 }
2588 return true;
2589}
2590
3a7be554 2591static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2592{
2593 return do_zip(s, a, false);
2594}
2595
3a7be554 2596static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2597{
2598 return do_zip(s, a, true);
2599}
2600
74b64b25
RH
2601static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2602{
2603 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2604 return false;
2605 }
2606 if (sve_access_check(s)) {
2607 unsigned vsz = vec_full_reg_size(s);
2608 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2609 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2610 vec_full_reg_offset(s, a->rn) + high_ofs,
2611 vec_full_reg_offset(s, a->rm) + high_ofs,
2612 vsz, vsz, 0, gen_helper_sve2_zip_q);
2613 }
2614 return true;
2615}
2616
2617static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2618{
2619 return do_zip_q(s, a, false);
2620}
2621
2622static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2623{
2624 return do_zip_q(s, a, true);
2625}
2626
234b48e9
RH
2627static gen_helper_gvec_3 * const uzp_fns[4] = {
2628 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2629 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2630};
2631
32e2ad65
RH
2632TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2633 uzp_fns[a->esz], a, 0)
2634TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2635 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2636
32e2ad65
RH
2637TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2638 gen_helper_sve2_uzp_q, a, 0)
2639TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2640 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2641
234b48e9
RH
2642static gen_helper_gvec_3 * const trn_fns[4] = {
2643 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2644 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2645};
2646
32e2ad65
RH
2647TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2648 trn_fns[a->esz], a, 0)
2649TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2650 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2651
32e2ad65
RH
2652TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2653 gen_helper_sve2_trn_q, a, 0)
2654TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2655 gen_helper_sve2_trn_q, a, 16)
74b64b25 2656
3ca879ae
RH
2657/*
2658 *** SVE Permute Vector - Predicated Group
2659 */
2660
3a7be554 2661static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2662{
2663 static gen_helper_gvec_3 * const fns[4] = {
2664 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2665 };
b051809a 2666 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
3ca879ae
RH
2667}
2668
ef23cb72
RH
2669/* Call the helper that computes the ARM LastActiveElement pseudocode
2670 * function, scaled by the element size. This includes the not found
2671 * indication; e.g. not found for esz=3 is -8.
2672 */
2673static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2674{
2675 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2676 * round up, as we do elsewhere, because we need the exact size.
2677 */
2678 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2679 unsigned desc = 0;
ef23cb72 2680
2acbfbe4
RH
2681 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2682 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2683
2684 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2685
c6a59b55 2686 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2687
ef23cb72
RH
2688 tcg_temp_free_ptr(t_p);
2689}
2690
2691/* Increment LAST to the offset of the next element in the vector,
2692 * wrapping around to 0.
2693 */
2694static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2695{
2696 unsigned vsz = vec_full_reg_size(s);
2697
2698 tcg_gen_addi_i32(last, last, 1 << esz);
2699 if (is_power_of_2(vsz)) {
2700 tcg_gen_andi_i32(last, last, vsz - 1);
2701 } else {
4b308bd5
RH
2702 TCGv_i32 max = tcg_constant_i32(vsz);
2703 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2704 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2705 }
2706}
2707
2708/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2709static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2710{
2711 unsigned vsz = vec_full_reg_size(s);
2712
2713 if (is_power_of_2(vsz)) {
2714 tcg_gen_andi_i32(last, last, vsz - 1);
2715 } else {
4b308bd5
RH
2716 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2717 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2718 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2719 }
2720}
2721
2722/* Load an unsigned element of ESZ from BASE+OFS. */
2723static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2724{
2725 TCGv_i64 r = tcg_temp_new_i64();
2726
2727 switch (esz) {
2728 case 0:
2729 tcg_gen_ld8u_i64(r, base, ofs);
2730 break;
2731 case 1:
2732 tcg_gen_ld16u_i64(r, base, ofs);
2733 break;
2734 case 2:
2735 tcg_gen_ld32u_i64(r, base, ofs);
2736 break;
2737 case 3:
2738 tcg_gen_ld_i64(r, base, ofs);
2739 break;
2740 default:
2741 g_assert_not_reached();
2742 }
2743 return r;
2744}
2745
2746/* Load an unsigned element of ESZ from RM[LAST]. */
2747static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2748 int rm, int esz)
2749{
2750 TCGv_ptr p = tcg_temp_new_ptr();
2751 TCGv_i64 r;
2752
2753 /* Convert offset into vector into offset into ENV.
2754 * The final adjustment for the vector register base
2755 * is added via constant offset to the load.
2756 */
e03b5686 2757#if HOST_BIG_ENDIAN
ef23cb72
RH
2758 /* Adjust for element ordering. See vec_reg_offset. */
2759 if (esz < 3) {
2760 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2761 }
2762#endif
2763 tcg_gen_ext_i32_ptr(p, last);
2764 tcg_gen_add_ptr(p, p, cpu_env);
2765
2766 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2767 tcg_temp_free_ptr(p);
2768
2769 return r;
2770}
2771
2772/* Compute CLAST for a Zreg. */
2773static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2774{
2775 TCGv_i32 last;
2776 TCGLabel *over;
2777 TCGv_i64 ele;
2778 unsigned vsz, esz = a->esz;
2779
2780 if (!sve_access_check(s)) {
2781 return true;
2782 }
2783
2784 last = tcg_temp_local_new_i32();
2785 over = gen_new_label();
2786
2787 find_last_active(s, last, esz, a->pg);
2788
2789 /* There is of course no movcond for a 2048-bit vector,
2790 * so we must branch over the actual store.
2791 */
2792 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2793
2794 if (!before) {
2795 incr_last_active(s, last, esz);
2796 }
2797
2798 ele = load_last_active(s, last, a->rm, esz);
2799 tcg_temp_free_i32(last);
2800
2801 vsz = vec_full_reg_size(s);
2802 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2803 tcg_temp_free_i64(ele);
2804
2805 /* If this insn used MOVPRFX, we may need a second move. */
2806 if (a->rd != a->rn) {
2807 TCGLabel *done = gen_new_label();
2808 tcg_gen_br(done);
2809
2810 gen_set_label(over);
2811 do_mov_z(s, a->rd, a->rn);
2812
2813 gen_set_label(done);
2814 } else {
2815 gen_set_label(over);
2816 }
2817 return true;
2818}
2819
3a7be554 2820static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2821{
2822 return do_clast_vector(s, a, false);
2823}
2824
3a7be554 2825static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2826{
2827 return do_clast_vector(s, a, true);
2828}
2829
2830/* Compute CLAST for a scalar. */
2831static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2832 bool before, TCGv_i64 reg_val)
2833{
2834 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2835 TCGv_i64 ele, cmp;
ef23cb72
RH
2836
2837 find_last_active(s, last, esz, pg);
2838
2839 /* Extend the original value of last prior to incrementing. */
2840 cmp = tcg_temp_new_i64();
2841 tcg_gen_ext_i32_i64(cmp, last);
2842
2843 if (!before) {
2844 incr_last_active(s, last, esz);
2845 }
2846
2847 /* The conceit here is that while last < 0 indicates not found, after
2848 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2849 * from which we can load garbage. We then discard the garbage with
2850 * a conditional move.
2851 */
2852 ele = load_last_active(s, last, rm, esz);
2853 tcg_temp_free_i32(last);
2854
053552d3
RH
2855 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2856 ele, reg_val);
ef23cb72 2857
ef23cb72
RH
2858 tcg_temp_free_i64(cmp);
2859 tcg_temp_free_i64(ele);
2860}
2861
2862/* Compute CLAST for a Vreg. */
2863static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2864{
2865 if (sve_access_check(s)) {
2866 int esz = a->esz;
2867 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2868 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2869
2870 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2871 write_fp_dreg(s, a->rd, reg);
2872 tcg_temp_free_i64(reg);
2873 }
2874 return true;
2875}
2876
3a7be554 2877static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2878{
2879 return do_clast_fp(s, a, false);
2880}
2881
3a7be554 2882static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2883{
2884 return do_clast_fp(s, a, true);
2885}
2886
2887/* Compute CLAST for a Xreg. */
2888static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2889{
2890 TCGv_i64 reg;
2891
2892 if (!sve_access_check(s)) {
2893 return true;
2894 }
2895
2896 reg = cpu_reg(s, a->rd);
2897 switch (a->esz) {
2898 case 0:
2899 tcg_gen_ext8u_i64(reg, reg);
2900 break;
2901 case 1:
2902 tcg_gen_ext16u_i64(reg, reg);
2903 break;
2904 case 2:
2905 tcg_gen_ext32u_i64(reg, reg);
2906 break;
2907 case 3:
2908 break;
2909 default:
2910 g_assert_not_reached();
2911 }
2912
2913 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2914 return true;
2915}
2916
3a7be554 2917static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2918{
2919 return do_clast_general(s, a, false);
2920}
2921
3a7be554 2922static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2923{
2924 return do_clast_general(s, a, true);
2925}
2926
2927/* Compute LAST for a scalar. */
2928static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2929 int pg, int rm, bool before)
2930{
2931 TCGv_i32 last = tcg_temp_new_i32();
2932 TCGv_i64 ret;
2933
2934 find_last_active(s, last, esz, pg);
2935 if (before) {
2936 wrap_last_active(s, last, esz);
2937 } else {
2938 incr_last_active(s, last, esz);
2939 }
2940
2941 ret = load_last_active(s, last, rm, esz);
2942 tcg_temp_free_i32(last);
2943 return ret;
2944}
2945
2946/* Compute LAST for a Vreg. */
2947static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2948{
2949 if (sve_access_check(s)) {
2950 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2951 write_fp_dreg(s, a->rd, val);
2952 tcg_temp_free_i64(val);
2953 }
2954 return true;
2955}
2956
3a7be554 2957static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2958{
2959 return do_last_fp(s, a, false);
2960}
2961
3a7be554 2962static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2963{
2964 return do_last_fp(s, a, true);
2965}
2966
2967/* Compute LAST for a Xreg. */
2968static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2969{
2970 if (sve_access_check(s)) {
2971 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2972 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2973 tcg_temp_free_i64(val);
2974 }
2975 return true;
2976}
2977
3a7be554 2978static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2979{
2980 return do_last_general(s, a, false);
2981}
2982
3a7be554 2983static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2984{
2985 return do_last_general(s, a, true);
2986}
2987
3a7be554 2988static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2989{
2990 if (sve_access_check(s)) {
2991 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2992 }
2993 return true;
2994}
2995
3a7be554 2996static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2997{
2998 if (sve_access_check(s)) {
2999 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3000 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3001 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3002 tcg_temp_free_i64(t);
3003 }
3004 return true;
3005}
3006
3a7be554 3007static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3008{
3009 static gen_helper_gvec_3 * const fns[4] = {
3010 NULL,
3011 gen_helper_sve_revb_h,
3012 gen_helper_sve_revb_s,
3013 gen_helper_sve_revb_d,
3014 };
b051809a 3015 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
dae8fb90
RH
3016}
3017
3a7be554 3018static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3019{
3020 static gen_helper_gvec_3 * const fns[4] = {
3021 NULL,
3022 NULL,
3023 gen_helper_sve_revh_s,
3024 gen_helper_sve_revh_d,
3025 };
b051809a 3026 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
dae8fb90
RH
3027}
3028
3a7be554 3029static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90 3030{
b051809a
RH
3031 return gen_gvec_ool_arg_zpz(s, a->esz == 3 ? gen_helper_sve_revw_d
3032 : NULL, a, 0);
dae8fb90
RH
3033}
3034
3a7be554 3035static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3036{
3037 static gen_helper_gvec_3 * const fns[4] = {
3038 gen_helper_sve_rbit_b,
3039 gen_helper_sve_rbit_h,
3040 gen_helper_sve_rbit_s,
3041 gen_helper_sve_rbit_d,
3042 };
b051809a 3043 return gen_gvec_ool_arg_zpz(s, fns[a->esz], a, 0);
dae8fb90
RH
3044}
3045
3a7be554 3046static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
3047{
3048 if (sve_access_check(s)) {
36cbb7a8 3049 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 3050 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
3051 }
3052 return true;
3053}
3054
75114792
SL
3055static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3056{
3057 if (!dc_isar_feature(aa64_sve2, s)) {
3058 return false;
3059 }
3060 if (sve_access_check(s)) {
3061 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3062 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3063 }
3064 return true;
3065}
3066
757f9cff
RH
3067/*
3068 *** SVE Integer Compare - Vectors Group
3069 */
3070
3071static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3072 gen_helper_gvec_flags_4 *gen_fn)
3073{
3074 TCGv_ptr pd, zn, zm, pg;
3075 unsigned vsz;
3076 TCGv_i32 t;
3077
3078 if (gen_fn == NULL) {
3079 return false;
3080 }
3081 if (!sve_access_check(s)) {
3082 return true;
3083 }
3084
3085 vsz = vec_full_reg_size(s);
392acacc 3086 t = tcg_temp_new_i32();
757f9cff
RH
3087 pd = tcg_temp_new_ptr();
3088 zn = tcg_temp_new_ptr();
3089 zm = tcg_temp_new_ptr();
3090 pg = tcg_temp_new_ptr();
3091
3092 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3093 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3094 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3095 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3096
392acacc 3097 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
3098
3099 tcg_temp_free_ptr(pd);
3100 tcg_temp_free_ptr(zn);
3101 tcg_temp_free_ptr(zm);
3102 tcg_temp_free_ptr(pg);
3103
3104 do_pred_flags(t);
3105
3106 tcg_temp_free_i32(t);
3107 return true;
3108}
3109
3110#define DO_PPZZ(NAME, name) \
3a7be554 3111static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3112{ \
3113 static gen_helper_gvec_flags_4 * const fns[4] = { \
3114 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3115 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3116 }; \
3117 return do_ppzz_flags(s, a, fns[a->esz]); \
3118}
3119
3120DO_PPZZ(CMPEQ, cmpeq)
3121DO_PPZZ(CMPNE, cmpne)
3122DO_PPZZ(CMPGT, cmpgt)
3123DO_PPZZ(CMPGE, cmpge)
3124DO_PPZZ(CMPHI, cmphi)
3125DO_PPZZ(CMPHS, cmphs)
3126
3127#undef DO_PPZZ
3128
3129#define DO_PPZW(NAME, name) \
3a7be554 3130static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3131{ \
3132 static gen_helper_gvec_flags_4 * const fns[4] = { \
3133 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3134 gen_helper_sve_##name##_ppzw_s, NULL \
3135 }; \
3136 return do_ppzz_flags(s, a, fns[a->esz]); \
3137}
3138
3139DO_PPZW(CMPEQ, cmpeq)
3140DO_PPZW(CMPNE, cmpne)
3141DO_PPZW(CMPGT, cmpgt)
3142DO_PPZW(CMPGE, cmpge)
3143DO_PPZW(CMPHI, cmphi)
3144DO_PPZW(CMPHS, cmphs)
3145DO_PPZW(CMPLT, cmplt)
3146DO_PPZW(CMPLE, cmple)
3147DO_PPZW(CMPLO, cmplo)
3148DO_PPZW(CMPLS, cmpls)
3149
3150#undef DO_PPZW
3151
38cadeba
RH
3152/*
3153 *** SVE Integer Compare - Immediate Groups
3154 */
3155
3156static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3157 gen_helper_gvec_flags_3 *gen_fn)
3158{
3159 TCGv_ptr pd, zn, pg;
3160 unsigned vsz;
3161 TCGv_i32 t;
3162
3163 if (gen_fn == NULL) {
3164 return false;
3165 }
3166 if (!sve_access_check(s)) {
3167 return true;
3168 }
3169
3170 vsz = vec_full_reg_size(s);
392acacc 3171 t = tcg_temp_new_i32();
38cadeba
RH
3172 pd = tcg_temp_new_ptr();
3173 zn = tcg_temp_new_ptr();
3174 pg = tcg_temp_new_ptr();
3175
3176 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3177 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3178 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3179
392acacc 3180 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3181
3182 tcg_temp_free_ptr(pd);
3183 tcg_temp_free_ptr(zn);
3184 tcg_temp_free_ptr(pg);
3185
3186 do_pred_flags(t);
3187
3188 tcg_temp_free_i32(t);
3189 return true;
3190}
3191
3192#define DO_PPZI(NAME, name) \
3a7be554 3193static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3194{ \
3195 static gen_helper_gvec_flags_3 * const fns[4] = { \
3196 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3197 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3198 }; \
3199 return do_ppzi_flags(s, a, fns[a->esz]); \
3200}
3201
3202DO_PPZI(CMPEQ, cmpeq)
3203DO_PPZI(CMPNE, cmpne)
3204DO_PPZI(CMPGT, cmpgt)
3205DO_PPZI(CMPGE, cmpge)
3206DO_PPZI(CMPHI, cmphi)
3207DO_PPZI(CMPHS, cmphs)
3208DO_PPZI(CMPLT, cmplt)
3209DO_PPZI(CMPLE, cmple)
3210DO_PPZI(CMPLO, cmplo)
3211DO_PPZI(CMPLS, cmpls)
3212
3213#undef DO_PPZI
3214
35da316f
RH
3215/*
3216 *** SVE Partition Break Group
3217 */
3218
3219static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3220 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3221{
3222 if (!sve_access_check(s)) {
3223 return true;
3224 }
3225
3226 unsigned vsz = pred_full_reg_size(s);
3227
3228 /* Predicate sizes may be smaller and cannot use simd_desc. */
3229 TCGv_ptr d = tcg_temp_new_ptr();
3230 TCGv_ptr n = tcg_temp_new_ptr();
3231 TCGv_ptr m = tcg_temp_new_ptr();
3232 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3233 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3234
3235 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3236 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3237 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3238 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3239
3240 if (a->s) {
93418f1c
RH
3241 TCGv_i32 t = tcg_temp_new_i32();
3242 fn_s(t, d, n, m, g, desc);
35da316f 3243 do_pred_flags(t);
93418f1c 3244 tcg_temp_free_i32(t);
35da316f 3245 } else {
93418f1c 3246 fn(d, n, m, g, desc);
35da316f
RH
3247 }
3248 tcg_temp_free_ptr(d);
3249 tcg_temp_free_ptr(n);
3250 tcg_temp_free_ptr(m);
3251 tcg_temp_free_ptr(g);
35da316f
RH
3252 return true;
3253}
3254
3255static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3256 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3257{
3258 if (!sve_access_check(s)) {
3259 return true;
3260 }
3261
3262 unsigned vsz = pred_full_reg_size(s);
3263
3264 /* Predicate sizes may be smaller and cannot use simd_desc. */
3265 TCGv_ptr d = tcg_temp_new_ptr();
3266 TCGv_ptr n = tcg_temp_new_ptr();
3267 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3268 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3269
3270 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3271 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3272 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3273
3274 if (a->s) {
93418f1c
RH
3275 TCGv_i32 t = tcg_temp_new_i32();
3276 fn_s(t, d, n, g, desc);
35da316f 3277 do_pred_flags(t);
93418f1c 3278 tcg_temp_free_i32(t);
35da316f 3279 } else {
93418f1c 3280 fn(d, n, g, desc);
35da316f
RH
3281 }
3282 tcg_temp_free_ptr(d);
3283 tcg_temp_free_ptr(n);
3284 tcg_temp_free_ptr(g);
35da316f
RH
3285 return true;
3286}
3287
3a7be554 3288static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3289{
3290 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3291}
3292
3a7be554 3293static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3294{
3295 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3296}
3297
3a7be554 3298static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3299{
3300 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3301}
3302
3a7be554 3303static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3304{
3305 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3306}
3307
3a7be554 3308static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3309{
3310 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3311}
3312
3a7be554 3313static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3314{
3315 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3316}
3317
3a7be554 3318static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3319{
3320 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3321}
3322
9ee3a611
RH
3323/*
3324 *** SVE Predicate Count Group
3325 */
3326
3327static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3328{
3329 unsigned psz = pred_full_reg_size(s);
3330
3331 if (psz <= 8) {
3332 uint64_t psz_mask;
3333
3334 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3335 if (pn != pg) {
3336 TCGv_i64 g = tcg_temp_new_i64();
3337 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3338 tcg_gen_and_i64(val, val, g);
3339 tcg_temp_free_i64(g);
3340 }
3341
3342 /* Reduce the pred_esz_masks value simply to reduce the
3343 * size of the code generated here.
3344 */
3345 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3346 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3347
3348 tcg_gen_ctpop_i64(val, val);
3349 } else {
3350 TCGv_ptr t_pn = tcg_temp_new_ptr();
3351 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3352 unsigned desc = 0;
9ee3a611 3353
f556a201
RH
3354 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3355 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3356
3357 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3358 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3359
c6a59b55 3360 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3361 tcg_temp_free_ptr(t_pn);
3362 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3363 }
3364}
3365
3a7be554 3366static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3367{
3368 if (sve_access_check(s)) {
3369 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3370 }
3371 return true;
3372}
3373
3a7be554 3374static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3375{
3376 if (sve_access_check(s)) {
3377 TCGv_i64 reg = cpu_reg(s, a->rd);
3378 TCGv_i64 val = tcg_temp_new_i64();
3379
3380 do_cntp(s, val, a->esz, a->pg, a->pg);
3381 if (a->d) {
3382 tcg_gen_sub_i64(reg, reg, val);
3383 } else {
3384 tcg_gen_add_i64(reg, reg, val);
3385 }
3386 tcg_temp_free_i64(val);
3387 }
3388 return true;
3389}
3390
3a7be554 3391static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3392{
3393 if (a->esz == 0) {
3394 return false;
3395 }
3396 if (sve_access_check(s)) {
3397 unsigned vsz = vec_full_reg_size(s);
3398 TCGv_i64 val = tcg_temp_new_i64();
3399 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3400
3401 do_cntp(s, val, a->esz, a->pg, a->pg);
3402 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3403 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3404 }
3405 return true;
3406}
3407
3a7be554 3408static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3409{
3410 if (sve_access_check(s)) {
3411 TCGv_i64 reg = cpu_reg(s, a->rd);
3412 TCGv_i64 val = tcg_temp_new_i64();
3413
3414 do_cntp(s, val, a->esz, a->pg, a->pg);
3415 do_sat_addsub_32(reg, val, a->u, a->d);
3416 }
3417 return true;
3418}
3419
3a7be554 3420static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3421{
3422 if (sve_access_check(s)) {
3423 TCGv_i64 reg = cpu_reg(s, a->rd);
3424 TCGv_i64 val = tcg_temp_new_i64();
3425
3426 do_cntp(s, val, a->esz, a->pg, a->pg);
3427 do_sat_addsub_64(reg, val, a->u, a->d);
3428 }
3429 return true;
3430}
3431
3a7be554 3432static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3433{
3434 if (a->esz == 0) {
3435 return false;
3436 }
3437 if (sve_access_check(s)) {
3438 TCGv_i64 val = tcg_temp_new_i64();
3439 do_cntp(s, val, a->esz, a->pg, a->pg);
3440 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3441 }
3442 return true;
3443}
3444
caf1cefc
RH
3445/*
3446 *** SVE Integer Compare Scalars Group
3447 */
3448
3a7be554 3449static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3450{
3451 if (!sve_access_check(s)) {
3452 return true;
3453 }
3454
3455 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3456 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3457 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3458 TCGv_i64 cmp = tcg_temp_new_i64();
3459
3460 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3461 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3462 tcg_temp_free_i64(cmp);
3463
3464 /* VF = !NF & !CF. */
3465 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3466 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3467
3468 /* Both NF and VF actually look at bit 31. */
3469 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3470 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3471 return true;
3472}
3473
3a7be554 3474static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3475{
bbd0968c 3476 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3477 TCGv_i32 t2;
caf1cefc 3478 TCGv_ptr ptr;
e610906c
RH
3479 unsigned vsz = vec_full_reg_size(s);
3480 unsigned desc = 0;
caf1cefc 3481 TCGCond cond;
34688dbc
RH
3482 uint64_t maxval;
3483 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3484 bool eq = a->eq == a->lt;
caf1cefc 3485
34688dbc
RH
3486 /* The greater-than conditions are all SVE2. */
3487 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3488 return false;
3489 }
bbd0968c
RH
3490 if (!sve_access_check(s)) {
3491 return true;
3492 }
3493
3494 op0 = read_cpu_reg(s, a->rn, 1);
3495 op1 = read_cpu_reg(s, a->rm, 1);
3496
caf1cefc
RH
3497 if (!a->sf) {
3498 if (a->u) {
3499 tcg_gen_ext32u_i64(op0, op0);
3500 tcg_gen_ext32u_i64(op1, op1);
3501 } else {
3502 tcg_gen_ext32s_i64(op0, op0);
3503 tcg_gen_ext32s_i64(op1, op1);
3504 }
3505 }
3506
3507 /* For the helper, compress the different conditions into a computation
3508 * of how many iterations for which the condition is true.
caf1cefc 3509 */
bbd0968c
RH
3510 t0 = tcg_temp_new_i64();
3511 t1 = tcg_temp_new_i64();
34688dbc
RH
3512
3513 if (a->lt) {
3514 tcg_gen_sub_i64(t0, op1, op0);
3515 if (a->u) {
3516 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3517 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3518 } else {
3519 maxval = a->sf ? INT64_MAX : INT32_MAX;
3520 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3521 }
3522 } else {
3523 tcg_gen_sub_i64(t0, op0, op1);
3524 if (a->u) {
3525 maxval = 0;
3526 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3527 } else {
3528 maxval = a->sf ? INT64_MIN : INT32_MIN;
3529 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3530 }
3531 }
caf1cefc 3532
4481bbf2 3533 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3534 if (eq) {
caf1cefc
RH
3535 /* Equality means one more iteration. */
3536 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3537
34688dbc
RH
3538 /*
3539 * For the less-than while, if op1 is maxval (and the only time
3540 * the addition above could overflow), then we produce an all-true
3541 * predicate by setting the count to the vector length. This is
3542 * because the pseudocode is described as an increment + compare
3543 * loop, and the maximum integer would always compare true.
3544 * Similarly, the greater-than while has the same issue with the
3545 * minimum integer due to the decrement + compare loop.
bbd0968c 3546 */
34688dbc 3547 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3548 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3549 }
3550
bbd0968c
RH
3551 /* Bound to the maximum. */
3552 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3553
3554 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3555 tcg_gen_movi_i64(t1, 0);
3556 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3557 tcg_temp_free_i64(t1);
caf1cefc 3558
bbd0968c 3559 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3560 t2 = tcg_temp_new_i32();
3561 tcg_gen_extrl_i64_i32(t2, t0);
3562 tcg_temp_free_i64(t0);
bbd0968c
RH
3563
3564 /* Scale elements to bits. */
3565 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3566
e610906c
RH
3567 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3568 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3569
3570 ptr = tcg_temp_new_ptr();
3571 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3572
34688dbc 3573 if (a->lt) {
4481bbf2 3574 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3575 } else {
4481bbf2 3576 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3577 }
caf1cefc
RH
3578 do_pred_flags(t2);
3579
3580 tcg_temp_free_ptr(ptr);
3581 tcg_temp_free_i32(t2);
caf1cefc
RH
3582 return true;
3583}
3584
14f6dad1
RH
3585static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3586{
3587 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3588 TCGv_i32 t2;
14f6dad1
RH
3589 TCGv_ptr ptr;
3590 unsigned vsz = vec_full_reg_size(s);
3591 unsigned desc = 0;
3592
3593 if (!dc_isar_feature(aa64_sve2, s)) {
3594 return false;
3595 }
3596 if (!sve_access_check(s)) {
3597 return true;
3598 }
3599
3600 op0 = read_cpu_reg(s, a->rn, 1);
3601 op1 = read_cpu_reg(s, a->rm, 1);
3602
4481bbf2 3603 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3604 diff = tcg_temp_new_i64();
3605
3606 if (a->rw) {
3607 /* WHILERW */
3608 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3609 t1 = tcg_temp_new_i64();
3610 tcg_gen_sub_i64(diff, op0, op1);
3611 tcg_gen_sub_i64(t1, op1, op0);
3612 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3613 tcg_temp_free_i64(t1);
3614 /* Round down to a multiple of ESIZE. */
3615 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3616 /* If op1 == op0, diff == 0, and the condition is always true. */
3617 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3618 } else {
3619 /* WHILEWR */
3620 tcg_gen_sub_i64(diff, op1, op0);
3621 /* Round down to a multiple of ESIZE. */
3622 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3623 /* If op0 >= op1, diff <= 0, the condition is always true. */
3624 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3625 }
3626
3627 /* Bound to the maximum. */
3628 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3629
3630 /* Since we're bounded, pass as a 32-bit type. */
3631 t2 = tcg_temp_new_i32();
3632 tcg_gen_extrl_i64_i32(t2, diff);
3633 tcg_temp_free_i64(diff);
3634
3635 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3636 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3637
3638 ptr = tcg_temp_new_ptr();
3639 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3640
4481bbf2 3641 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3642 do_pred_flags(t2);
3643
3644 tcg_temp_free_ptr(ptr);
3645 tcg_temp_free_i32(t2);
14f6dad1
RH
3646 return true;
3647}
3648
ed491961
RH
3649/*
3650 *** SVE Integer Wide Immediate - Unpredicated Group
3651 */
3652
3a7be554 3653static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3654{
3655 if (a->esz == 0) {
3656 return false;
3657 }
3658 if (sve_access_check(s)) {
3659 unsigned vsz = vec_full_reg_size(s);
3660 int dofs = vec_full_reg_offset(s, a->rd);
3661 uint64_t imm;
3662
3663 /* Decode the VFP immediate. */
3664 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3665 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3666 }
3667 return true;
3668}
3669
3a7be554 3670static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3671{
3a7be554 3672 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3673 return false;
3674 }
3675 if (sve_access_check(s)) {
3676 unsigned vsz = vec_full_reg_size(s);
3677 int dofs = vec_full_reg_offset(s, a->rd);
3678
8711e71f 3679 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3680 }
3681 return true;
3682}
3683
3a7be554 3684static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3685{
3a7be554 3686 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3687 return false;
3688 }
3689 if (sve_access_check(s)) {
3690 unsigned vsz = vec_full_reg_size(s);
3691 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3692 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3693 }
3694 return true;
3695}
3696
3a7be554 3697static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3698{
3699 a->imm = -a->imm;
3a7be554 3700 return trans_ADD_zzi(s, a);
6e6a157d
RH
3701}
3702
3a7be554 3703static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3704{
53229a77 3705 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3706 static const GVecGen2s op[4] = {
3707 { .fni8 = tcg_gen_vec_sub8_i64,
3708 .fniv = tcg_gen_sub_vec,
3709 .fno = gen_helper_sve_subri_b,
53229a77 3710 .opt_opc = vecop_list,
6e6a157d
RH
3711 .vece = MO_8,
3712 .scalar_first = true },
3713 { .fni8 = tcg_gen_vec_sub16_i64,
3714 .fniv = tcg_gen_sub_vec,
3715 .fno = gen_helper_sve_subri_h,
53229a77 3716 .opt_opc = vecop_list,
6e6a157d
RH
3717 .vece = MO_16,
3718 .scalar_first = true },
3719 { .fni4 = tcg_gen_sub_i32,
3720 .fniv = tcg_gen_sub_vec,
3721 .fno = gen_helper_sve_subri_s,
53229a77 3722 .opt_opc = vecop_list,
6e6a157d
RH
3723 .vece = MO_32,
3724 .scalar_first = true },
3725 { .fni8 = tcg_gen_sub_i64,
3726 .fniv = tcg_gen_sub_vec,
3727 .fno = gen_helper_sve_subri_d,
53229a77 3728 .opt_opc = vecop_list,
6e6a157d
RH
3729 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3730 .vece = MO_64,
3731 .scalar_first = true }
3732 };
3733
3a7be554 3734 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3735 return false;
3736 }
3737 if (sve_access_check(s)) {
3738 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3739 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3740 vec_full_reg_offset(s, a->rn),
9fff3fcc 3741 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3742 }
3743 return true;
3744}
3745
3a7be554 3746static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3747{
3748 if (sve_access_check(s)) {
3749 unsigned vsz = vec_full_reg_size(s);
3750 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3751 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3752 }
3753 return true;
3754}
3755
3a7be554 3756static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3757{
3a7be554 3758 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3759 return false;
3760 }
3761 if (sve_access_check(s)) {
138a1f7b
RH
3762 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3763 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3764 }
3765 return true;
3766}
3767
3a7be554 3768static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3769{
3a7be554 3770 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3771}
3772
3a7be554 3773static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3774{
3a7be554 3775 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3776}
3777
3a7be554 3778static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3779{
3a7be554 3780 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3781}
3782
3a7be554 3783static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3784{
3a7be554 3785 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3786}
3787
3788static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3789{
3790 if (sve_access_check(s)) {
3791 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3792 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3793 vec_full_reg_offset(s, a->rn),
138a1f7b 3794 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3795 }
3796 return true;
3797}
3798
3799#define DO_ZZI(NAME, name) \
3a7be554 3800static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3801{ \
3802 static gen_helper_gvec_2i * const fns[4] = { \
3803 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3804 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3805 }; \
3806 return do_zzi_ool(s, a, fns[a->esz]); \
3807}
3808
3809DO_ZZI(SMAX, smax)
3810DO_ZZI(UMAX, umax)
3811DO_ZZI(SMIN, smin)
3812DO_ZZI(UMIN, umin)
3813
3814#undef DO_ZZI
3815
5f425b92
RH
3816static gen_helper_gvec_4 * const dot_fns[2][2] = {
3817 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3818 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3819};
3820TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3821 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3822
814d4c52
RH
3823/*
3824 * SVE Multiply - Indexed
3825 */
3826
f3500a25
RH
3827TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3828 gen_helper_gvec_sdot_idx_b, a)
3829TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3830 gen_helper_gvec_sdot_idx_h, a)
3831TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3832 gen_helper_gvec_udot_idx_b, a)
3833TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3834 gen_helper_gvec_udot_idx_h, a)
3835
3836TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3837 gen_helper_gvec_sudot_idx_b, a)
3838TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3839 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3840
814d4c52 3841#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3842 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3843 a->rd, a->rn, a->rm, a->index)
814d4c52 3844
af031f64
RH
3845DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3846DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3847DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3848
af031f64
RH
3849DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3850DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3851DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3852
af031f64
RH
3853DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3854DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3855DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3856
814d4c52
RH
3857#undef DO_SVE2_RRX
3858
b95f5eeb 3859#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3860 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3861 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3862
3863DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3864DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3865DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3866DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3867
3868DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3869DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3870DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3871DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3872
3873DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3874DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3875DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3876DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3877
b95f5eeb
RH
3878#undef DO_SVE2_RRX_TB
3879
8a02aac7 3880#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3881 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3882
8681eb76
RH
3883DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3884DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3885DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3886
8681eb76
RH
3887DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3888DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3889DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3890
8681eb76
RH
3891DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3892DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3893DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3894
8681eb76
RH
3895DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3896DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3897DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3898
8a02aac7
RH
3899#undef DO_SVE2_RRXR
3900
c5c455d7 3901#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3902 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3903 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3904
3905DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3906DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3907DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3908DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3909
3910DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3911DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3912DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3913DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3914
3915DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3916DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3917DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3918DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3919
3920DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3921DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3922DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3923DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3924
3925DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3926DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3927DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3928DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3929
3930DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3931DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3932DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3933DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3934
3935#undef DO_SVE2_RRXR_TB
3936
3b787ed8 3937#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3938 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3939 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3940
3941DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3942DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3943
3944DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3945DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3946
21068f39
RH
3947DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3948DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3949
3b787ed8
RH
3950#undef DO_SVE2_RRXR_ROT
3951
ca40a6e6
RH
3952/*
3953 *** SVE Floating Point Multiply-Add Indexed Group
3954 */
3955
0a82d963 3956static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
3957{
3958 static gen_helper_gvec_4_ptr * const fns[3] = {
3959 gen_helper_gvec_fmla_idx_h,
3960 gen_helper_gvec_fmla_idx_s,
3961 gen_helper_gvec_fmla_idx_d,
3962 };
3963
3964 if (sve_access_check(s)) {
3965 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3966 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3967 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3968 vec_full_reg_offset(s, a->rn),
3969 vec_full_reg_offset(s, a->rm),
3970 vec_full_reg_offset(s, a->ra),
0a82d963 3971 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
3972 fns[a->esz - 1]);
3973 tcg_temp_free_ptr(status);
3974 }
3975 return true;
3976}
3977
0a82d963
RH
3978static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3979{
3980 return do_FMLA_zzxz(s, a, false);
3981}
3982
3983static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3984{
3985 return do_FMLA_zzxz(s, a, true);
3986}
3987
ca40a6e6
RH
3988/*
3989 *** SVE Floating Point Multiply Indexed Group
3990 */
3991
3a7be554 3992static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3993{
3994 static gen_helper_gvec_3_ptr * const fns[3] = {
3995 gen_helper_gvec_fmul_idx_h,
3996 gen_helper_gvec_fmul_idx_s,
3997 gen_helper_gvec_fmul_idx_d,
3998 };
3999
4000 if (sve_access_check(s)) {
4001 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4002 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4003 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4004 vec_full_reg_offset(s, a->rn),
4005 vec_full_reg_offset(s, a->rm),
4006 status, vsz, vsz, a->index, fns[a->esz - 1]);
4007 tcg_temp_free_ptr(status);
4008 }
4009 return true;
4010}
4011
23fbe79f
RH
4012/*
4013 *** SVE Floating Point Fast Reduction Group
4014 */
4015
4016typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4017 TCGv_ptr, TCGv_i32);
4018
4019static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4020 gen_helper_fp_reduce *fn)
4021{
4022 unsigned vsz = vec_full_reg_size(s);
4023 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 4024 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
4025 TCGv_ptr t_zn, t_pg, status;
4026 TCGv_i64 temp;
4027
4028 temp = tcg_temp_new_i64();
4029 t_zn = tcg_temp_new_ptr();
4030 t_pg = tcg_temp_new_ptr();
4031
4032 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4033 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4034 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
4035
4036 fn(temp, t_zn, t_pg, status, t_desc);
4037 tcg_temp_free_ptr(t_zn);
4038 tcg_temp_free_ptr(t_pg);
4039 tcg_temp_free_ptr(status);
23fbe79f
RH
4040
4041 write_fp_dreg(s, a->rd, temp);
4042 tcg_temp_free_i64(temp);
4043}
4044
4045#define DO_VPZ(NAME, name) \
3a7be554 4046static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
4047{ \
4048 static gen_helper_fp_reduce * const fns[3] = { \
4049 gen_helper_sve_##name##_h, \
4050 gen_helper_sve_##name##_s, \
4051 gen_helper_sve_##name##_d, \
4052 }; \
4053 if (a->esz == 0) { \
4054 return false; \
4055 } \
4056 if (sve_access_check(s)) { \
4057 do_reduce(s, a, fns[a->esz - 1]); \
4058 } \
4059 return true; \
4060}
4061
4062DO_VPZ(FADDV, faddv)
4063DO_VPZ(FMINNMV, fminnmv)
4064DO_VPZ(FMAXNMV, fmaxnmv)
4065DO_VPZ(FMINV, fminv)
4066DO_VPZ(FMAXV, fmaxv)
4067
3887c038
RH
4068/*
4069 *** SVE Floating Point Unary Operations - Unpredicated Group
4070 */
4071
4072static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4073{
4074 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4075 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
4076
4077 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4078 vec_full_reg_offset(s, a->rn),
4079 status, vsz, vsz, 0, fn);
4080 tcg_temp_free_ptr(status);
4081}
4082
3a7be554 4083static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4084{
4085 static gen_helper_gvec_2_ptr * const fns[3] = {
4086 gen_helper_gvec_frecpe_h,
4087 gen_helper_gvec_frecpe_s,
4088 gen_helper_gvec_frecpe_d,
4089 };
4090 if (a->esz == 0) {
4091 return false;
4092 }
4093 if (sve_access_check(s)) {
4094 do_zz_fp(s, a, fns[a->esz - 1]);
4095 }
4096 return true;
4097}
4098
3a7be554 4099static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4100{
4101 static gen_helper_gvec_2_ptr * const fns[3] = {
4102 gen_helper_gvec_frsqrte_h,
4103 gen_helper_gvec_frsqrte_s,
4104 gen_helper_gvec_frsqrte_d,
4105 };
4106 if (a->esz == 0) {
4107 return false;
4108 }
4109 if (sve_access_check(s)) {
4110 do_zz_fp(s, a, fns[a->esz - 1]);
4111 }
4112 return true;
4113}
4114
4d2e2a03
RH
4115/*
4116 *** SVE Floating Point Compare with Zero Group
4117 */
4118
4119static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4120 gen_helper_gvec_3_ptr *fn)
4121{
4122 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4123 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4124
4125 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4126 vec_full_reg_offset(s, a->rn),
4127 pred_full_reg_offset(s, a->pg),
4128 status, vsz, vsz, 0, fn);
4129 tcg_temp_free_ptr(status);
4130}
4131
4132#define DO_PPZ(NAME, name) \
3a7be554 4133static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4134{ \
4135 static gen_helper_gvec_3_ptr * const fns[3] = { \
4136 gen_helper_sve_##name##_h, \
4137 gen_helper_sve_##name##_s, \
4138 gen_helper_sve_##name##_d, \
4139 }; \
4140 if (a->esz == 0) { \
4141 return false; \
4142 } \
4143 if (sve_access_check(s)) { \
4144 do_ppz_fp(s, a, fns[a->esz - 1]); \
4145 } \
4146 return true; \
4147}
4148
4149DO_PPZ(FCMGE_ppz0, fcmge0)
4150DO_PPZ(FCMGT_ppz0, fcmgt0)
4151DO_PPZ(FCMLE_ppz0, fcmle0)
4152DO_PPZ(FCMLT_ppz0, fcmlt0)
4153DO_PPZ(FCMEQ_ppz0, fcmeq0)
4154DO_PPZ(FCMNE_ppz0, fcmne0)
4155
4156#undef DO_PPZ
4157
67fcd9ad
RH
4158/*
4159 *** SVE floating-point trig multiply-add coefficient
4160 */
4161
3a7be554 4162static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4163{
4164 static gen_helper_gvec_3_ptr * const fns[3] = {
4165 gen_helper_sve_ftmad_h,
4166 gen_helper_sve_ftmad_s,
4167 gen_helper_sve_ftmad_d,
4168 };
4169
4170 if (a->esz == 0) {
4171 return false;
4172 }
4173 if (sve_access_check(s)) {
4174 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4175 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4176 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4177 vec_full_reg_offset(s, a->rn),
4178 vec_full_reg_offset(s, a->rm),
4179 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4180 tcg_temp_free_ptr(status);
4181 }
4182 return true;
4183}
4184
7f9ddf64
RH
4185/*
4186 *** SVE Floating Point Accumulating Reduction Group
4187 */
4188
3a7be554 4189static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4190{
4191 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4192 TCGv_ptr, TCGv_ptr, TCGv_i32);
4193 static fadda_fn * const fns[3] = {
4194 gen_helper_sve_fadda_h,
4195 gen_helper_sve_fadda_s,
4196 gen_helper_sve_fadda_d,
4197 };
4198 unsigned vsz = vec_full_reg_size(s);
4199 TCGv_ptr t_rm, t_pg, t_fpst;
4200 TCGv_i64 t_val;
4201 TCGv_i32 t_desc;
4202
4203 if (a->esz == 0) {
4204 return false;
4205 }
4206 if (!sve_access_check(s)) {
4207 return true;
4208 }
4209
4210 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4211 t_rm = tcg_temp_new_ptr();
4212 t_pg = tcg_temp_new_ptr();
4213 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4214 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4215 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4216 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
4217
4218 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4219
7f9ddf64
RH
4220 tcg_temp_free_ptr(t_fpst);
4221 tcg_temp_free_ptr(t_pg);
4222 tcg_temp_free_ptr(t_rm);
4223
4224 write_fp_dreg(s, a->rd, t_val);
4225 tcg_temp_free_i64(t_val);
4226 return true;
4227}
4228
29b80469
RH
4229/*
4230 *** SVE Floating Point Arithmetic - Unpredicated Group
4231 */
4232
4233static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4234 gen_helper_gvec_3_ptr *fn)
4235{
4236 if (fn == NULL) {
4237 return false;
4238 }
4239 if (sve_access_check(s)) {
4240 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4241 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4242 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4243 vec_full_reg_offset(s, a->rn),
4244 vec_full_reg_offset(s, a->rm),
4245 status, vsz, vsz, 0, fn);
4246 tcg_temp_free_ptr(status);
4247 }
4248 return true;
4249}
4250
4251
4252#define DO_FP3(NAME, name) \
3a7be554 4253static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4254{ \
4255 static gen_helper_gvec_3_ptr * const fns[4] = { \
4256 NULL, gen_helper_gvec_##name##_h, \
4257 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4258 }; \
4259 return do_zzz_fp(s, a, fns[a->esz]); \
4260}
4261
4262DO_FP3(FADD_zzz, fadd)
4263DO_FP3(FSUB_zzz, fsub)
4264DO_FP3(FMUL_zzz, fmul)
4265DO_FP3(FTSMUL, ftsmul)
4266DO_FP3(FRECPS, recps)
4267DO_FP3(FRSQRTS, rsqrts)
4268
4269#undef DO_FP3
4270
ec3b87c2
RH
4271/*
4272 *** SVE Floating Point Arithmetic - Predicated Group
4273 */
4274
4275static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4276 gen_helper_gvec_4_ptr *fn)
4277{
4278 if (fn == NULL) {
4279 return false;
4280 }
4281 if (sve_access_check(s)) {
4282 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4283 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4284 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4285 vec_full_reg_offset(s, a->rn),
4286 vec_full_reg_offset(s, a->rm),
4287 pred_full_reg_offset(s, a->pg),
4288 status, vsz, vsz, 0, fn);
4289 tcg_temp_free_ptr(status);
4290 }
4291 return true;
4292}
4293
4294#define DO_FP3(NAME, name) \
3a7be554 4295static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4296{ \
4297 static gen_helper_gvec_4_ptr * const fns[4] = { \
4298 NULL, gen_helper_sve_##name##_h, \
4299 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4300 }; \
4301 return do_zpzz_fp(s, a, fns[a->esz]); \
4302}
4303
4304DO_FP3(FADD_zpzz, fadd)
4305DO_FP3(FSUB_zpzz, fsub)
4306DO_FP3(FMUL_zpzz, fmul)
4307DO_FP3(FMIN_zpzz, fmin)
4308DO_FP3(FMAX_zpzz, fmax)
4309DO_FP3(FMINNM_zpzz, fminnum)
4310DO_FP3(FMAXNM_zpzz, fmaxnum)
4311DO_FP3(FABD, fabd)
4312DO_FP3(FSCALE, fscalbn)
4313DO_FP3(FDIV, fdiv)
4314DO_FP3(FMULX, fmulx)
4315
4316#undef DO_FP3
8092c6a3 4317
cc48affe
RH
4318typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4319 TCGv_i64, TCGv_ptr, TCGv_i32);
4320
4321static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4322 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4323{
4324 unsigned vsz = vec_full_reg_size(s);
4325 TCGv_ptr t_zd, t_zn, t_pg, status;
4326 TCGv_i32 desc;
4327
4328 t_zd = tcg_temp_new_ptr();
4329 t_zn = tcg_temp_new_ptr();
4330 t_pg = tcg_temp_new_ptr();
4331 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4332 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4333 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4334
cdfb22bb 4335 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4336 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4337 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4338
cc48affe
RH
4339 tcg_temp_free_ptr(status);
4340 tcg_temp_free_ptr(t_pg);
4341 tcg_temp_free_ptr(t_zn);
4342 tcg_temp_free_ptr(t_zd);
4343}
4344
4345static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4346 gen_helper_sve_fp2scalar *fn)
4347{
138a1f7b
RH
4348 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4349 tcg_constant_i64(imm), fn);
cc48affe
RH
4350}
4351
4352#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4353static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4354{ \
4355 static gen_helper_sve_fp2scalar * const fns[3] = { \
4356 gen_helper_sve_##name##_h, \
4357 gen_helper_sve_##name##_s, \
4358 gen_helper_sve_##name##_d \
4359 }; \
4360 static uint64_t const val[3][2] = { \
4361 { float16_##const0, float16_##const1 }, \
4362 { float32_##const0, float32_##const1 }, \
4363 { float64_##const0, float64_##const1 }, \
4364 }; \
4365 if (a->esz == 0) { \
4366 return false; \
4367 } \
4368 if (sve_access_check(s)) { \
4369 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4370 } \
4371 return true; \
4372}
4373
cc48affe
RH
4374DO_FP_IMM(FADD, fadds, half, one)
4375DO_FP_IMM(FSUB, fsubs, half, one)
4376DO_FP_IMM(FMUL, fmuls, half, two)
4377DO_FP_IMM(FSUBR, fsubrs, half, one)
4378DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4379DO_FP_IMM(FMINNM, fminnms, zero, one)
4380DO_FP_IMM(FMAX, fmaxs, zero, one)
4381DO_FP_IMM(FMIN, fmins, zero, one)
4382
4383#undef DO_FP_IMM
4384
abfdefd5
RH
4385static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4386 gen_helper_gvec_4_ptr *fn)
4387{
4388 if (fn == NULL) {
4389 return false;
4390 }
4391 if (sve_access_check(s)) {
4392 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4393 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4394 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4395 vec_full_reg_offset(s, a->rn),
4396 vec_full_reg_offset(s, a->rm),
4397 pred_full_reg_offset(s, a->pg),
4398 status, vsz, vsz, 0, fn);
4399 tcg_temp_free_ptr(status);
4400 }
4401 return true;
4402}
4403
4404#define DO_FPCMP(NAME, name) \
3a7be554 4405static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4406{ \
4407 static gen_helper_gvec_4_ptr * const fns[4] = { \
4408 NULL, gen_helper_sve_##name##_h, \
4409 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4410 }; \
4411 return do_fp_cmp(s, a, fns[a->esz]); \
4412}
4413
4414DO_FPCMP(FCMGE, fcmge)
4415DO_FPCMP(FCMGT, fcmgt)
4416DO_FPCMP(FCMEQ, fcmeq)
4417DO_FPCMP(FCMNE, fcmne)
4418DO_FPCMP(FCMUO, fcmuo)
4419DO_FPCMP(FACGE, facge)
4420DO_FPCMP(FACGT, facgt)
4421
4422#undef DO_FPCMP
4423
3a7be554 4424static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4425{
4426 static gen_helper_gvec_4_ptr * const fns[3] = {
4427 gen_helper_sve_fcadd_h,
4428 gen_helper_sve_fcadd_s,
4429 gen_helper_sve_fcadd_d
4430 };
4431
4432 if (a->esz == 0) {
4433 return false;
4434 }
4435 if (sve_access_check(s)) {
4436 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4437 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4438 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4439 vec_full_reg_offset(s, a->rn),
4440 vec_full_reg_offset(s, a->rm),
4441 pred_full_reg_offset(s, a->pg),
4442 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4443 tcg_temp_free_ptr(status);
4444 }
4445 return true;
4446}
4447
08975da9
RH
4448static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4449 gen_helper_gvec_5_ptr *fn)
6ceabaad 4450{
08975da9 4451 if (a->esz == 0) {
6ceabaad
RH
4452 return false;
4453 }
08975da9
RH
4454 if (sve_access_check(s)) {
4455 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4456 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4457 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4458 vec_full_reg_offset(s, a->rn),
4459 vec_full_reg_offset(s, a->rm),
4460 vec_full_reg_offset(s, a->ra),
4461 pred_full_reg_offset(s, a->pg),
4462 status, vsz, vsz, 0, fn);
4463 tcg_temp_free_ptr(status);
6ceabaad 4464 }
6ceabaad
RH
4465 return true;
4466}
4467
4468#define DO_FMLA(NAME, name) \
3a7be554 4469static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4470{ \
08975da9 4471 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4472 NULL, gen_helper_sve_##name##_h, \
4473 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4474 }; \
4475 return do_fmla(s, a, fns[a->esz]); \
4476}
4477
4478DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4479DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4480DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4481DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4482
4483#undef DO_FMLA
4484
3a7be554 4485static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4486{
08975da9
RH
4487 static gen_helper_gvec_5_ptr * const fns[4] = {
4488 NULL,
05f48bab
RH
4489 gen_helper_sve_fcmla_zpzzz_h,
4490 gen_helper_sve_fcmla_zpzzz_s,
4491 gen_helper_sve_fcmla_zpzzz_d,
4492 };
4493
4494 if (a->esz == 0) {
4495 return false;
4496 }
4497 if (sve_access_check(s)) {
4498 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4499 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4500 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4501 vec_full_reg_offset(s, a->rn),
4502 vec_full_reg_offset(s, a->rm),
4503 vec_full_reg_offset(s, a->ra),
4504 pred_full_reg_offset(s, a->pg),
4505 status, vsz, vsz, a->rot, fns[a->esz]);
4506 tcg_temp_free_ptr(status);
05f48bab
RH
4507 }
4508 return true;
4509}
4510
3a7be554 4511static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4512{
636ddeb1 4513 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4514 gen_helper_gvec_fcmlah_idx,
4515 gen_helper_gvec_fcmlas_idx,
4516 };
4517
4518 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4519 tcg_debug_assert(a->rd == a->ra);
4520 if (sve_access_check(s)) {
4521 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4522 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4523 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4524 vec_full_reg_offset(s, a->rn),
4525 vec_full_reg_offset(s, a->rm),
636ddeb1 4526 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4527 status, vsz, vsz,
4528 a->index * 4 + a->rot,
4529 fns[a->esz - 1]);
4530 tcg_temp_free_ptr(status);
4531 }
4532 return true;
4533}
4534
8092c6a3
RH
4535/*
4536 *** SVE Floating Point Unary Operations Predicated Group
4537 */
4538
4539static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4540 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4541{
4542 if (sve_access_check(s)) {
4543 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4544 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4545 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4546 vec_full_reg_offset(s, rn),
4547 pred_full_reg_offset(s, pg),
4548 status, vsz, vsz, 0, fn);
4549 tcg_temp_free_ptr(status);
4550 }
4551 return true;
4552}
4553
3a7be554 4554static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4555{
e4ab5124 4556 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4557}
4558
3a7be554 4559static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4560{
4561 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4562}
4563
d29b17ca
RH
4564static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4565{
4566 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4567 return false;
4568 }
4569 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4570}
4571
3a7be554 4572static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4573{
e4ab5124 4574 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4575}
4576
3a7be554 4577static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4578{
4579 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4580}
4581
3a7be554 4582static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4583{
4584 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4585}
4586
3a7be554 4587static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4588{
4589 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4590}
4591
3a7be554 4592static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4593{
4594 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4595}
4596
3a7be554 4597static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4598{
4599 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4600}
4601
3a7be554 4602static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4603{
4604 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4605}
4606
3a7be554 4607static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4608{
4609 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4610}
4611
3a7be554 4612static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4613{
4614 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4615}
4616
3a7be554 4617static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4618{
4619 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4620}
4621
3a7be554 4622static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4623{
4624 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4625}
4626
3a7be554 4627static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4628{
4629 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4630}
4631
3a7be554 4632static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4633{
4634 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4635}
4636
3a7be554 4637static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4638{
4639 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4640}
4641
3a7be554 4642static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4643{
4644 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4645}
4646
3a7be554 4647static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4648{
4649 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4650}
4651
3a7be554 4652static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4653{
4654 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4655}
4656
3a7be554 4657static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4658{
4659 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4660}
4661
cda3c753
RH
4662static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4663 gen_helper_sve_frint_h,
4664 gen_helper_sve_frint_s,
4665 gen_helper_sve_frint_d
4666};
4667
3a7be554 4668static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4669{
4670 if (a->esz == 0) {
4671 return false;
4672 }
4673 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4674 frint_fns[a->esz - 1]);
4675}
4676
3a7be554 4677static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4678{
4679 static gen_helper_gvec_3_ptr * const fns[3] = {
4680 gen_helper_sve_frintx_h,
4681 gen_helper_sve_frintx_s,
4682 gen_helper_sve_frintx_d
4683 };
4684 if (a->esz == 0) {
4685 return false;
4686 }
4687 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4688}
4689
95365277
SL
4690static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4691 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4692{
cda3c753
RH
4693 if (sve_access_check(s)) {
4694 unsigned vsz = vec_full_reg_size(s);
4695 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4696 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4697
4698 gen_helper_set_rmode(tmode, tmode, status);
4699
4700 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4701 vec_full_reg_offset(s, a->rn),
4702 pred_full_reg_offset(s, a->pg),
95365277 4703 status, vsz, vsz, 0, fn);
cda3c753
RH
4704
4705 gen_helper_set_rmode(tmode, tmode, status);
4706 tcg_temp_free_i32(tmode);
4707 tcg_temp_free_ptr(status);
4708 }
4709 return true;
4710}
4711
3a7be554 4712static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4713{
95365277
SL
4714 if (a->esz == 0) {
4715 return false;
4716 }
4717 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4718}
4719
3a7be554 4720static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4721{
95365277
SL
4722 if (a->esz == 0) {
4723 return false;
4724 }
4725 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4726}
4727
3a7be554 4728static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4729{
95365277
SL
4730 if (a->esz == 0) {
4731 return false;
4732 }
4733 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4734}
4735
3a7be554 4736static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4737{
95365277
SL
4738 if (a->esz == 0) {
4739 return false;
4740 }
4741 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4742}
4743
3a7be554 4744static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4745{
95365277
SL
4746 if (a->esz == 0) {
4747 return false;
4748 }
4749 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4750}
4751
3a7be554 4752static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4753{
4754 static gen_helper_gvec_3_ptr * const fns[3] = {
4755 gen_helper_sve_frecpx_h,
4756 gen_helper_sve_frecpx_s,
4757 gen_helper_sve_frecpx_d
4758 };
4759 if (a->esz == 0) {
4760 return false;
4761 }
4762 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4763}
4764
3a7be554 4765static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4766{
4767 static gen_helper_gvec_3_ptr * const fns[3] = {
4768 gen_helper_sve_fsqrt_h,
4769 gen_helper_sve_fsqrt_s,
4770 gen_helper_sve_fsqrt_d
4771 };
4772 if (a->esz == 0) {
4773 return false;
4774 }
4775 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4776}
4777
3a7be554 4778static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4779{
4780 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4781}
4782
3a7be554 4783static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4784{
4785 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4786}
4787
3a7be554 4788static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4789{
4790 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4791}
4792
3a7be554 4793static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4794{
4795 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4796}
4797
3a7be554 4798static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4799{
4800 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4801}
4802
3a7be554 4803static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4804{
4805 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4806}
4807
3a7be554 4808static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4809{
4810 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4811}
4812
3a7be554 4813static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4814{
4815 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4816}
4817
3a7be554 4818static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4819{
4820 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4821}
4822
3a7be554 4823static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4824{
4825 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4826}
4827
3a7be554 4828static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4829{
4830 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4831}
4832
3a7be554 4833static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4834{
4835 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4836}
4837
3a7be554 4838static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4839{
4840 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4841}
4842
3a7be554 4843static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4844{
4845 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4846}
4847
d1822297
RH
4848/*
4849 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4850 */
4851
4852/* Subroutine loading a vector register at VOFS of LEN bytes.
4853 * The load should begin at the address Rn + IMM.
4854 */
4855
19f2acc9 4856static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4857{
19f2acc9
RH
4858 int len_align = QEMU_ALIGN_DOWN(len, 8);
4859 int len_remain = len % 8;
4860 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4861 int midx = get_mem_index(s);
b2aa8879 4862 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4863
b2aa8879
RH
4864 dirty_addr = tcg_temp_new_i64();
4865 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4866 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4867 tcg_temp_free_i64(dirty_addr);
d1822297 4868
b2aa8879
RH
4869 /*
4870 * Note that unpredicated load/store of vector/predicate registers
d1822297 4871 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4872 * operations on larger quantities.
d1822297
RH
4873 * Attempt to keep code expansion to a minimum by limiting the
4874 * amount of unrolling done.
4875 */
4876 if (nparts <= 4) {
4877 int i;
4878
b2aa8879 4879 t0 = tcg_temp_new_i64();
d1822297 4880 for (i = 0; i < len_align; i += 8) {
fc313c64 4881 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4882 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4883 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4884 }
b2aa8879 4885 tcg_temp_free_i64(t0);
d1822297
RH
4886 } else {
4887 TCGLabel *loop = gen_new_label();
4888 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4889
b2aa8879
RH
4890 /* Copy the clean address into a local temp, live across the loop. */
4891 t0 = clean_addr;
4b4dc975 4892 clean_addr = new_tmp_a64_local(s);
b2aa8879 4893 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4894
b2aa8879 4895 gen_set_label(loop);
d1822297 4896
b2aa8879 4897 t0 = tcg_temp_new_i64();
fc313c64 4898 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4899 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4900
b2aa8879 4901 tp = tcg_temp_new_ptr();
d1822297
RH
4902 tcg_gen_add_ptr(tp, cpu_env, i);
4903 tcg_gen_addi_ptr(i, i, 8);
4904 tcg_gen_st_i64(t0, tp, vofs);
4905 tcg_temp_free_ptr(tp);
b2aa8879 4906 tcg_temp_free_i64(t0);
d1822297
RH
4907
4908 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4909 tcg_temp_free_ptr(i);
4910 }
4911
b2aa8879
RH
4912 /*
4913 * Predicate register loads can be any multiple of 2.
d1822297
RH
4914 * Note that we still store the entire 64-bit unit into cpu_env.
4915 */
4916 if (len_remain) {
b2aa8879 4917 t0 = tcg_temp_new_i64();
d1822297
RH
4918 switch (len_remain) {
4919 case 2:
4920 case 4:
4921 case 8:
b2aa8879
RH
4922 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4923 MO_LE | ctz32(len_remain));
d1822297
RH
4924 break;
4925
4926 case 6:
4927 t1 = tcg_temp_new_i64();
b2aa8879
RH
4928 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4929 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4930 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4931 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4932 tcg_temp_free_i64(t1);
4933 break;
4934
4935 default:
4936 g_assert_not_reached();
4937 }
4938 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4939 tcg_temp_free_i64(t0);
d1822297 4940 }
d1822297
RH
4941}
4942
5047c204 4943/* Similarly for stores. */
19f2acc9 4944static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4945{
19f2acc9
RH
4946 int len_align = QEMU_ALIGN_DOWN(len, 8);
4947 int len_remain = len % 8;
4948 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4949 int midx = get_mem_index(s);
bba87d0a 4950 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4951
bba87d0a
RH
4952 dirty_addr = tcg_temp_new_i64();
4953 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4954 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4955 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4956
4957 /* Note that unpredicated load/store of vector/predicate registers
4958 * are defined as a stream of bytes, which equates to little-endian
4959 * operations on larger quantities. There is no nice way to force
4960 * a little-endian store for aarch64_be-linux-user out of line.
4961 *
4962 * Attempt to keep code expansion to a minimum by limiting the
4963 * amount of unrolling done.
4964 */
4965 if (nparts <= 4) {
4966 int i;
4967
bba87d0a 4968 t0 = tcg_temp_new_i64();
5047c204
RH
4969 for (i = 0; i < len_align; i += 8) {
4970 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4971 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4972 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4973 }
bba87d0a 4974 tcg_temp_free_i64(t0);
5047c204
RH
4975 } else {
4976 TCGLabel *loop = gen_new_label();
bba87d0a 4977 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4978
bba87d0a
RH
4979 /* Copy the clean address into a local temp, live across the loop. */
4980 t0 = clean_addr;
4b4dc975 4981 clean_addr = new_tmp_a64_local(s);
bba87d0a 4982 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4983
bba87d0a 4984 gen_set_label(loop);
5047c204 4985
bba87d0a
RH
4986 t0 = tcg_temp_new_i64();
4987 tp = tcg_temp_new_ptr();
4988 tcg_gen_add_ptr(tp, cpu_env, i);
4989 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4990 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4991 tcg_temp_free_ptr(tp);
4992
fc313c64 4993 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4994 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4995 tcg_temp_free_i64(t0);
5047c204
RH
4996
4997 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4998 tcg_temp_free_ptr(i);
4999 }
5000
5001 /* Predicate register stores can be any multiple of 2. */
5002 if (len_remain) {
bba87d0a 5003 t0 = tcg_temp_new_i64();
5047c204 5004 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
5005
5006 switch (len_remain) {
5007 case 2:
5008 case 4:
5009 case 8:
bba87d0a
RH
5010 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5011 MO_LE | ctz32(len_remain));
5047c204
RH
5012 break;
5013
5014 case 6:
bba87d0a
RH
5015 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5016 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 5017 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 5018 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
5019 break;
5020
5021 default:
5022 g_assert_not_reached();
5023 }
bba87d0a 5024 tcg_temp_free_i64(t0);
5047c204 5025 }
5047c204
RH
5026}
5027
3a7be554 5028static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
5029{
5030 if (sve_access_check(s)) {
5031 int size = vec_full_reg_size(s);
5032 int off = vec_full_reg_offset(s, a->rd);
5033 do_ldr(s, off, size, a->rn, a->imm * size);
5034 }
5035 return true;
5036}
5037
3a7be554 5038static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
5039{
5040 if (sve_access_check(s)) {
5041 int size = pred_full_reg_size(s);
5042 int off = pred_full_reg_offset(s, a->rd);
5043 do_ldr(s, off, size, a->rn, a->imm * size);
5044 }
5045 return true;
5046}
c4e7c493 5047
3a7be554 5048static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
5049{
5050 if (sve_access_check(s)) {
5051 int size = vec_full_reg_size(s);
5052 int off = vec_full_reg_offset(s, a->rd);
5053 do_str(s, off, size, a->rn, a->imm * size);
5054 }
5055 return true;
5056}
5057
3a7be554 5058static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
5059{
5060 if (sve_access_check(s)) {
5061 int size = pred_full_reg_size(s);
5062 int off = pred_full_reg_offset(s, a->rd);
5063 do_str(s, off, size, a->rn, a->imm * size);
5064 }
5065 return true;
5066}
5067
c4e7c493
RH
5068/*
5069 *** SVE Memory - Contiguous Load Group
5070 */
5071
5072/* The memory mode of the dtype. */
14776ab5 5073static const MemOp dtype_mop[16] = {
c4e7c493
RH
5074 MO_UB, MO_UB, MO_UB, MO_UB,
5075 MO_SL, MO_UW, MO_UW, MO_UW,
5076 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 5077 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
5078};
5079
5080#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5081
5082/* The vector element size of dtype. */
5083static const uint8_t dtype_esz[16] = {
5084 0, 1, 2, 3,
5085 3, 1, 2, 3,
5086 3, 2, 2, 3,
5087 3, 2, 1, 3
5088};
5089
5090static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
5091 int dtype, uint32_t mte_n, bool is_write,
5092 gen_helper_gvec_mem *fn)
c4e7c493
RH
5093{
5094 unsigned vsz = vec_full_reg_size(s);
5095 TCGv_ptr t_pg;
206adacf 5096 int desc = 0;
c4e7c493 5097
206adacf
RH
5098 /*
5099 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
5100 * registers as pointers, so encode the regno into the data field.
5101 * For consistency, do this even for LD1.
5102 */
9473d0ec 5103 if (s->mte_active[0]) {
206adacf
RH
5104 int msz = dtype_msz(dtype);
5105
5106 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5107 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5108 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5109 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5110 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5111 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5112 } else {
5113 addr = clean_data_tbi(s, addr);
206adacf 5114 }
9473d0ec 5115
206adacf 5116 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
5117 t_pg = tcg_temp_new_ptr();
5118
5119 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 5120 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
5121
5122 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
5123}
5124
c182c6db
RH
5125/* Indexed by [mte][be][dtype][nreg] */
5126static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5127 { /* mte inactive, little-endian */
5128 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5129 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5130 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5131 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5132 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5133
5134 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5135 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5136 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5137 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5138 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5139
5140 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5141 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5142 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5143 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5144 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5145
5146 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5147 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5148 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5149 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5150 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5151
5152 /* mte inactive, big-endian */
5153 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5154 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5155 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5156 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5157 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5158
5159 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5160 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5161 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5162 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5163 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5164
5165 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5166 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5167 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5168 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5169 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5170
5171 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5172 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5173 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5174 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5175 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5176
5177 { /* mte active, little-endian */
5178 { { gen_helper_sve_ld1bb_r_mte,
5179 gen_helper_sve_ld2bb_r_mte,
5180 gen_helper_sve_ld3bb_r_mte,
5181 gen_helper_sve_ld4bb_r_mte },
5182 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5183 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5184 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5185
5186 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5187 { gen_helper_sve_ld1hh_le_r_mte,
5188 gen_helper_sve_ld2hh_le_r_mte,
5189 gen_helper_sve_ld3hh_le_r_mte,
5190 gen_helper_sve_ld4hh_le_r_mte },
5191 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5192 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5193
5194 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5195 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5196 { gen_helper_sve_ld1ss_le_r_mte,
5197 gen_helper_sve_ld2ss_le_r_mte,
5198 gen_helper_sve_ld3ss_le_r_mte,
5199 gen_helper_sve_ld4ss_le_r_mte },
5200 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5201
5202 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5203 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5204 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5205 { gen_helper_sve_ld1dd_le_r_mte,
5206 gen_helper_sve_ld2dd_le_r_mte,
5207 gen_helper_sve_ld3dd_le_r_mte,
5208 gen_helper_sve_ld4dd_le_r_mte } },
5209
5210 /* mte active, big-endian */
5211 { { gen_helper_sve_ld1bb_r_mte,
5212 gen_helper_sve_ld2bb_r_mte,
5213 gen_helper_sve_ld3bb_r_mte,
5214 gen_helper_sve_ld4bb_r_mte },
5215 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5216 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5217 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5218
5219 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5220 { gen_helper_sve_ld1hh_be_r_mte,
5221 gen_helper_sve_ld2hh_be_r_mte,
5222 gen_helper_sve_ld3hh_be_r_mte,
5223 gen_helper_sve_ld4hh_be_r_mte },
5224 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5225 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5226
5227 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5228 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5229 { gen_helper_sve_ld1ss_be_r_mte,
5230 gen_helper_sve_ld2ss_be_r_mte,
5231 gen_helper_sve_ld3ss_be_r_mte,
5232 gen_helper_sve_ld4ss_be_r_mte },
5233 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5234
5235 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5236 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5237 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5238 { gen_helper_sve_ld1dd_be_r_mte,
5239 gen_helper_sve_ld2dd_be_r_mte,
5240 gen_helper_sve_ld3dd_be_r_mte,
5241 gen_helper_sve_ld4dd_be_r_mte } } },
5242};
5243
c4e7c493
RH
5244static void do_ld_zpa(DisasContext *s, int zt, int pg,
5245 TCGv_i64 addr, int dtype, int nreg)
5246{
206adacf 5247 gen_helper_gvec_mem *fn
c182c6db 5248 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5249
206adacf
RH
5250 /*
5251 * While there are holes in the table, they are not
c4e7c493
RH
5252 * accessible via the instruction encoding.
5253 */
5254 assert(fn != NULL);
206adacf 5255 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5256}
5257
3a7be554 5258static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5259{
5260 if (a->rm == 31) {
5261 return false;
5262 }
5263 if (sve_access_check(s)) {
5264 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5265 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5266 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5267 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5268 }
5269 return true;
5270}
5271
3a7be554 5272static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5273{
5274 if (sve_access_check(s)) {
5275 int vsz = vec_full_reg_size(s);
5276 int elements = vsz >> dtype_esz[a->dtype];
5277 TCGv_i64 addr = new_tmp_a64(s);
5278
5279 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5280 (a->imm * elements * (a->nreg + 1))
5281 << dtype_msz(a->dtype));
5282 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5283 }
5284 return true;
5285}
e2654d75 5286
3a7be554 5287static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5288{
aa13f7c3
RH
5289 static gen_helper_gvec_mem * const fns[2][2][16] = {
5290 { /* mte inactive, little-endian */
5291 { gen_helper_sve_ldff1bb_r,
5292 gen_helper_sve_ldff1bhu_r,
5293 gen_helper_sve_ldff1bsu_r,
5294 gen_helper_sve_ldff1bdu_r,
5295
5296 gen_helper_sve_ldff1sds_le_r,
5297 gen_helper_sve_ldff1hh_le_r,
5298 gen_helper_sve_ldff1hsu_le_r,
5299 gen_helper_sve_ldff1hdu_le_r,
5300
5301 gen_helper_sve_ldff1hds_le_r,
5302 gen_helper_sve_ldff1hss_le_r,
5303 gen_helper_sve_ldff1ss_le_r,
5304 gen_helper_sve_ldff1sdu_le_r,
5305
5306 gen_helper_sve_ldff1bds_r,
5307 gen_helper_sve_ldff1bss_r,
5308 gen_helper_sve_ldff1bhs_r,
5309 gen_helper_sve_ldff1dd_le_r },
5310
5311 /* mte inactive, big-endian */
5312 { gen_helper_sve_ldff1bb_r,
5313 gen_helper_sve_ldff1bhu_r,
5314 gen_helper_sve_ldff1bsu_r,
5315 gen_helper_sve_ldff1bdu_r,
5316
5317 gen_helper_sve_ldff1sds_be_r,
5318 gen_helper_sve_ldff1hh_be_r,
5319 gen_helper_sve_ldff1hsu_be_r,
5320 gen_helper_sve_ldff1hdu_be_r,
5321
5322 gen_helper_sve_ldff1hds_be_r,
5323 gen_helper_sve_ldff1hss_be_r,
5324 gen_helper_sve_ldff1ss_be_r,
5325 gen_helper_sve_ldff1sdu_be_r,
5326
5327 gen_helper_sve_ldff1bds_r,
5328 gen_helper_sve_ldff1bss_r,
5329 gen_helper_sve_ldff1bhs_r,
5330 gen_helper_sve_ldff1dd_be_r } },
5331
5332 { /* mte active, little-endian */
5333 { gen_helper_sve_ldff1bb_r_mte,
5334 gen_helper_sve_ldff1bhu_r_mte,
5335 gen_helper_sve_ldff1bsu_r_mte,
5336 gen_helper_sve_ldff1bdu_r_mte,
5337
5338 gen_helper_sve_ldff1sds_le_r_mte,
5339 gen_helper_sve_ldff1hh_le_r_mte,
5340 gen_helper_sve_ldff1hsu_le_r_mte,
5341 gen_helper_sve_ldff1hdu_le_r_mte,
5342
5343 gen_helper_sve_ldff1hds_le_r_mte,
5344 gen_helper_sve_ldff1hss_le_r_mte,
5345 gen_helper_sve_ldff1ss_le_r_mte,
5346 gen_helper_sve_ldff1sdu_le_r_mte,
5347
5348 gen_helper_sve_ldff1bds_r_mte,
5349 gen_helper_sve_ldff1bss_r_mte,
5350 gen_helper_sve_ldff1bhs_r_mte,
5351 gen_helper_sve_ldff1dd_le_r_mte },
5352
5353 /* mte active, big-endian */
5354 { gen_helper_sve_ldff1bb_r_mte,
5355 gen_helper_sve_ldff1bhu_r_mte,
5356 gen_helper_sve_ldff1bsu_r_mte,
5357 gen_helper_sve_ldff1bdu_r_mte,
5358
5359 gen_helper_sve_ldff1sds_be_r_mte,
5360 gen_helper_sve_ldff1hh_be_r_mte,
5361 gen_helper_sve_ldff1hsu_be_r_mte,
5362 gen_helper_sve_ldff1hdu_be_r_mte,
5363
5364 gen_helper_sve_ldff1hds_be_r_mte,
5365 gen_helper_sve_ldff1hss_be_r_mte,
5366 gen_helper_sve_ldff1ss_be_r_mte,
5367 gen_helper_sve_ldff1sdu_be_r_mte,
5368
5369 gen_helper_sve_ldff1bds_r_mte,
5370 gen_helper_sve_ldff1bss_r_mte,
5371 gen_helper_sve_ldff1bhs_r_mte,
5372 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5373 };
5374
5375 if (sve_access_check(s)) {
5376 TCGv_i64 addr = new_tmp_a64(s);
5377 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5378 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5379 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5380 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5381 }
5382 return true;
5383}
5384
3a7be554 5385static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5386{
aa13f7c3
RH
5387 static gen_helper_gvec_mem * const fns[2][2][16] = {
5388 { /* mte inactive, little-endian */
5389 { gen_helper_sve_ldnf1bb_r,
5390 gen_helper_sve_ldnf1bhu_r,
5391 gen_helper_sve_ldnf1bsu_r,
5392 gen_helper_sve_ldnf1bdu_r,
5393
5394 gen_helper_sve_ldnf1sds_le_r,
5395 gen_helper_sve_ldnf1hh_le_r,
5396 gen_helper_sve_ldnf1hsu_le_r,
5397 gen_helper_sve_ldnf1hdu_le_r,
5398
5399 gen_helper_sve_ldnf1hds_le_r,
5400 gen_helper_sve_ldnf1hss_le_r,
5401 gen_helper_sve_ldnf1ss_le_r,
5402 gen_helper_sve_ldnf1sdu_le_r,
5403
5404 gen_helper_sve_ldnf1bds_r,
5405 gen_helper_sve_ldnf1bss_r,
5406 gen_helper_sve_ldnf1bhs_r,
5407 gen_helper_sve_ldnf1dd_le_r },
5408
5409 /* mte inactive, big-endian */
5410 { gen_helper_sve_ldnf1bb_r,
5411 gen_helper_sve_ldnf1bhu_r,
5412 gen_helper_sve_ldnf1bsu_r,
5413 gen_helper_sve_ldnf1bdu_r,
5414
5415 gen_helper_sve_ldnf1sds_be_r,
5416 gen_helper_sve_ldnf1hh_be_r,
5417 gen_helper_sve_ldnf1hsu_be_r,
5418 gen_helper_sve_ldnf1hdu_be_r,
5419
5420 gen_helper_sve_ldnf1hds_be_r,
5421 gen_helper_sve_ldnf1hss_be_r,
5422 gen_helper_sve_ldnf1ss_be_r,
5423 gen_helper_sve_ldnf1sdu_be_r,
5424
5425 gen_helper_sve_ldnf1bds_r,
5426 gen_helper_sve_ldnf1bss_r,
5427 gen_helper_sve_ldnf1bhs_r,
5428 gen_helper_sve_ldnf1dd_be_r } },
5429
5430 { /* mte inactive, little-endian */
5431 { gen_helper_sve_ldnf1bb_r_mte,
5432 gen_helper_sve_ldnf1bhu_r_mte,
5433 gen_helper_sve_ldnf1bsu_r_mte,
5434 gen_helper_sve_ldnf1bdu_r_mte,
5435
5436 gen_helper_sve_ldnf1sds_le_r_mte,
5437 gen_helper_sve_ldnf1hh_le_r_mte,
5438 gen_helper_sve_ldnf1hsu_le_r_mte,
5439 gen_helper_sve_ldnf1hdu_le_r_mte,
5440
5441 gen_helper_sve_ldnf1hds_le_r_mte,
5442 gen_helper_sve_ldnf1hss_le_r_mte,
5443 gen_helper_sve_ldnf1ss_le_r_mte,
5444 gen_helper_sve_ldnf1sdu_le_r_mte,
5445
5446 gen_helper_sve_ldnf1bds_r_mte,
5447 gen_helper_sve_ldnf1bss_r_mte,
5448 gen_helper_sve_ldnf1bhs_r_mte,
5449 gen_helper_sve_ldnf1dd_le_r_mte },
5450
5451 /* mte inactive, big-endian */
5452 { gen_helper_sve_ldnf1bb_r_mte,
5453 gen_helper_sve_ldnf1bhu_r_mte,
5454 gen_helper_sve_ldnf1bsu_r_mte,
5455 gen_helper_sve_ldnf1bdu_r_mte,
5456
5457 gen_helper_sve_ldnf1sds_be_r_mte,
5458 gen_helper_sve_ldnf1hh_be_r_mte,
5459 gen_helper_sve_ldnf1hsu_be_r_mte,
5460 gen_helper_sve_ldnf1hdu_be_r_mte,
5461
5462 gen_helper_sve_ldnf1hds_be_r_mte,
5463 gen_helper_sve_ldnf1hss_be_r_mte,
5464 gen_helper_sve_ldnf1ss_be_r_mte,
5465 gen_helper_sve_ldnf1sdu_be_r_mte,
5466
5467 gen_helper_sve_ldnf1bds_r_mte,
5468 gen_helper_sve_ldnf1bss_r_mte,
5469 gen_helper_sve_ldnf1bhs_r_mte,
5470 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5471 };
5472
5473 if (sve_access_check(s)) {
5474 int vsz = vec_full_reg_size(s);
5475 int elements = vsz >> dtype_esz[a->dtype];
5476 int off = (a->imm * elements) << dtype_msz(a->dtype);
5477 TCGv_i64 addr = new_tmp_a64(s);
5478
5479 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5480 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5481 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5482 }
5483 return true;
5484}
1a039c7e 5485
c182c6db 5486static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5487{
05abe304
RH
5488 unsigned vsz = vec_full_reg_size(s);
5489 TCGv_ptr t_pg;
7924d239 5490 int poff;
05abe304
RH
5491
5492 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5493 poff = pred_full_reg_offset(s, pg);
5494 if (vsz > 16) {
5495 /*
5496 * Zero-extend the first 16 bits of the predicate into a temporary.
5497 * This avoids triggering an assert making sure we don't have bits
5498 * set within a predicate beyond VQ, but we have lowered VQ to 1
5499 * for this load operation.
5500 */
5501 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5502#if HOST_BIG_ENDIAN
2a99ab2b
RH
5503 poff += 6;
5504#endif
5505 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5506
5507 poff = offsetof(CPUARMState, vfp.preg_tmp);
5508 tcg_gen_st_i64(tmp, cpu_env, poff);
5509 tcg_temp_free_i64(tmp);
5510 }
5511
05abe304 5512 t_pg = tcg_temp_new_ptr();
2a99ab2b 5513 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5514
c182c6db
RH
5515 gen_helper_gvec_mem *fn
5516 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5517 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5518
5519 tcg_temp_free_ptr(t_pg);
05abe304
RH
5520
5521 /* Replicate that first quadword. */
5522 if (vsz > 16) {
7924d239
RH
5523 int doff = vec_full_reg_offset(s, zt);
5524 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5525 }
5526}
5527
3a7be554 5528static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5529{
5530 if (a->rm == 31) {
5531 return false;
5532 }
5533 if (sve_access_check(s)) {
5534 int msz = dtype_msz(a->dtype);
5535 TCGv_i64 addr = new_tmp_a64(s);
5536 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5537 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5538 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5539 }
5540 return true;
5541}
5542
3a7be554 5543static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5544{
5545 if (sve_access_check(s)) {
5546 TCGv_i64 addr = new_tmp_a64(s);
5547 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5548 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5549 }
5550 return true;
5551}
5552
12c563f6
RH
5553static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5554{
5555 unsigned vsz = vec_full_reg_size(s);
5556 unsigned vsz_r32;
5557 TCGv_ptr t_pg;
5558 int poff, doff;
5559
5560 if (vsz < 32) {
5561 /*
5562 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5563 * in the ARM pseudocode, which is the sve_access_check() done
5564 * in our caller. We should not now return false from the caller.
5565 */
5566 unallocated_encoding(s);
5567 return;
5568 }
5569
5570 /* Load the first octaword using the normal predicated load helpers. */
5571
5572 poff = pred_full_reg_offset(s, pg);
5573 if (vsz > 32) {
5574 /*
5575 * Zero-extend the first 32 bits of the predicate into a temporary.
5576 * This avoids triggering an assert making sure we don't have bits
5577 * set within a predicate beyond VQ, but we have lowered VQ to 2
5578 * for this load operation.
5579 */
5580 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5581#if HOST_BIG_ENDIAN
12c563f6
RH
5582 poff += 4;
5583#endif
5584 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5585
5586 poff = offsetof(CPUARMState, vfp.preg_tmp);
5587 tcg_gen_st_i64(tmp, cpu_env, poff);
5588 tcg_temp_free_i64(tmp);
5589 }
5590
5591 t_pg = tcg_temp_new_ptr();
5592 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5593
5594 gen_helper_gvec_mem *fn
5595 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5596 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5597
5598 tcg_temp_free_ptr(t_pg);
5599
5600 /*
5601 * Replicate that first octaword.
5602 * The replication happens in units of 32; if the full vector size
5603 * is not a multiple of 32, the final bits are zeroed.
5604 */
5605 doff = vec_full_reg_offset(s, zt);
5606 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5607 if (vsz >= 64) {
5608 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5609 }
5610 vsz -= vsz_r32;
5611 if (vsz) {
5612 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5613 }
5614}
5615
5616static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5617{
5618 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5619 return false;
5620 }
5621 if (a->rm == 31) {
5622 return false;
5623 }
5624 if (sve_access_check(s)) {
5625 TCGv_i64 addr = new_tmp_a64(s);
5626 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5627 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5628 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5629 }
5630 return true;
5631}
5632
5633static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5634{
5635 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5636 return false;
5637 }
5638 if (sve_access_check(s)) {
5639 TCGv_i64 addr = new_tmp_a64(s);
5640 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5641 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5642 }
5643 return true;
5644}
5645
68459864 5646/* Load and broadcast element. */
3a7be554 5647static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5648{
68459864
RH
5649 unsigned vsz = vec_full_reg_size(s);
5650 unsigned psz = pred_full_reg_size(s);
5651 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5652 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5653 TCGLabel *over;
4ac430e1 5654 TCGv_i64 temp, clean_addr;
68459864 5655
c0ed9166
RH
5656 if (!sve_access_check(s)) {
5657 return true;
5658 }
5659
5660 over = gen_new_label();
5661
68459864
RH
5662 /* If the guarding predicate has no bits set, no load occurs. */
5663 if (psz <= 8) {
5664 /* Reduce the pred_esz_masks value simply to reduce the
5665 * size of the code generated here.
5666 */
5667 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5668 temp = tcg_temp_new_i64();
5669 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5670 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5671 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5672 tcg_temp_free_i64(temp);
5673 } else {
5674 TCGv_i32 t32 = tcg_temp_new_i32();
5675 find_last_active(s, t32, esz, a->pg);
5676 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5677 tcg_temp_free_i32(t32);
5678 }
5679
5680 /* Load the data. */
5681 temp = tcg_temp_new_i64();
d0e372b0 5682 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5683 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5684
5685 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5686 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5687
5688 /* Broadcast to *all* elements. */
5689 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5690 vsz, vsz, temp);
5691 tcg_temp_free_i64(temp);
5692
5693 /* Zero the inactive elements. */
5694 gen_set_label(over);
60245996 5695 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5696}
5697
1a039c7e
RH
5698static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5699 int msz, int esz, int nreg)
5700{
71b9f394
RH
5701 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5702 { { { gen_helper_sve_st1bb_r,
5703 gen_helper_sve_st1bh_r,
5704 gen_helper_sve_st1bs_r,
5705 gen_helper_sve_st1bd_r },
5706 { NULL,
5707 gen_helper_sve_st1hh_le_r,
5708 gen_helper_sve_st1hs_le_r,
5709 gen_helper_sve_st1hd_le_r },
5710 { NULL, NULL,
5711 gen_helper_sve_st1ss_le_r,
5712 gen_helper_sve_st1sd_le_r },
5713 { NULL, NULL, NULL,
5714 gen_helper_sve_st1dd_le_r } },
5715 { { gen_helper_sve_st1bb_r,
5716 gen_helper_sve_st1bh_r,
5717 gen_helper_sve_st1bs_r,
5718 gen_helper_sve_st1bd_r },
5719 { NULL,
5720 gen_helper_sve_st1hh_be_r,
5721 gen_helper_sve_st1hs_be_r,
5722 gen_helper_sve_st1hd_be_r },
5723 { NULL, NULL,
5724 gen_helper_sve_st1ss_be_r,
5725 gen_helper_sve_st1sd_be_r },
5726 { NULL, NULL, NULL,
5727 gen_helper_sve_st1dd_be_r } } },
5728
5729 { { { gen_helper_sve_st1bb_r_mte,
5730 gen_helper_sve_st1bh_r_mte,
5731 gen_helper_sve_st1bs_r_mte,
5732 gen_helper_sve_st1bd_r_mte },
5733 { NULL,
5734 gen_helper_sve_st1hh_le_r_mte,
5735 gen_helper_sve_st1hs_le_r_mte,
5736 gen_helper_sve_st1hd_le_r_mte },
5737 { NULL, NULL,
5738 gen_helper_sve_st1ss_le_r_mte,
5739 gen_helper_sve_st1sd_le_r_mte },
5740 { NULL, NULL, NULL,
5741 gen_helper_sve_st1dd_le_r_mte } },
5742 { { gen_helper_sve_st1bb_r_mte,
5743 gen_helper_sve_st1bh_r_mte,
5744 gen_helper_sve_st1bs_r_mte,
5745 gen_helper_sve_st1bd_r_mte },
5746 { NULL,
5747 gen_helper_sve_st1hh_be_r_mte,
5748 gen_helper_sve_st1hs_be_r_mte,
5749 gen_helper_sve_st1hd_be_r_mte },
5750 { NULL, NULL,
5751 gen_helper_sve_st1ss_be_r_mte,
5752 gen_helper_sve_st1sd_be_r_mte },
5753 { NULL, NULL, NULL,
5754 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5755 };
71b9f394
RH
5756 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5757 { { { gen_helper_sve_st2bb_r,
5758 gen_helper_sve_st2hh_le_r,
5759 gen_helper_sve_st2ss_le_r,
5760 gen_helper_sve_st2dd_le_r },
5761 { gen_helper_sve_st3bb_r,
5762 gen_helper_sve_st3hh_le_r,
5763 gen_helper_sve_st3ss_le_r,
5764 gen_helper_sve_st3dd_le_r },
5765 { gen_helper_sve_st4bb_r,
5766 gen_helper_sve_st4hh_le_r,
5767 gen_helper_sve_st4ss_le_r,
5768 gen_helper_sve_st4dd_le_r } },
5769 { { gen_helper_sve_st2bb_r,
5770 gen_helper_sve_st2hh_be_r,
5771 gen_helper_sve_st2ss_be_r,
5772 gen_helper_sve_st2dd_be_r },
5773 { gen_helper_sve_st3bb_r,
5774 gen_helper_sve_st3hh_be_r,
5775 gen_helper_sve_st3ss_be_r,
5776 gen_helper_sve_st3dd_be_r },
5777 { gen_helper_sve_st4bb_r,
5778 gen_helper_sve_st4hh_be_r,
5779 gen_helper_sve_st4ss_be_r,
5780 gen_helper_sve_st4dd_be_r } } },
5781 { { { gen_helper_sve_st2bb_r_mte,
5782 gen_helper_sve_st2hh_le_r_mte,
5783 gen_helper_sve_st2ss_le_r_mte,
5784 gen_helper_sve_st2dd_le_r_mte },
5785 { gen_helper_sve_st3bb_r_mte,
5786 gen_helper_sve_st3hh_le_r_mte,
5787 gen_helper_sve_st3ss_le_r_mte,
5788 gen_helper_sve_st3dd_le_r_mte },
5789 { gen_helper_sve_st4bb_r_mte,
5790 gen_helper_sve_st4hh_le_r_mte,
5791 gen_helper_sve_st4ss_le_r_mte,
5792 gen_helper_sve_st4dd_le_r_mte } },
5793 { { gen_helper_sve_st2bb_r_mte,
5794 gen_helper_sve_st2hh_be_r_mte,
5795 gen_helper_sve_st2ss_be_r_mte,
5796 gen_helper_sve_st2dd_be_r_mte },
5797 { gen_helper_sve_st3bb_r_mte,
5798 gen_helper_sve_st3hh_be_r_mte,
5799 gen_helper_sve_st3ss_be_r_mte,
5800 gen_helper_sve_st3dd_be_r_mte },
5801 { gen_helper_sve_st4bb_r_mte,
5802 gen_helper_sve_st4hh_be_r_mte,
5803 gen_helper_sve_st4ss_be_r_mte,
5804 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5805 };
5806 gen_helper_gvec_mem *fn;
28d57f2d 5807 int be = s->be_data == MO_BE;
1a039c7e
RH
5808
5809 if (nreg == 0) {
5810 /* ST1 */
71b9f394
RH
5811 fn = fn_single[s->mte_active[0]][be][msz][esz];
5812 nreg = 1;
1a039c7e
RH
5813 } else {
5814 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5815 assert(msz == esz);
71b9f394 5816 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5817 }
5818 assert(fn != NULL);
71b9f394 5819 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5820}
5821
3a7be554 5822static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5823{
5824 if (a->rm == 31 || a->msz > a->esz) {
5825 return false;
5826 }
5827 if (sve_access_check(s)) {
5828 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5829 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5830 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5831 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5832 }
5833 return true;
5834}
5835
3a7be554 5836static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5837{
5838 if (a->msz > a->esz) {
5839 return false;
5840 }
5841 if (sve_access_check(s)) {
5842 int vsz = vec_full_reg_size(s);
5843 int elements = vsz >> a->esz;
5844 TCGv_i64 addr = new_tmp_a64(s);
5845
5846 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5847 (a->imm * elements * (a->nreg + 1)) << a->msz);
5848 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5849 }
5850 return true;
5851}
f6dbf62a
RH
5852
5853/*
5854 *** SVE gather loads / scatter stores
5855 */
5856
500d0484 5857static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5858 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5859 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5860{
5861 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5862 TCGv_ptr t_zm = tcg_temp_new_ptr();
5863 TCGv_ptr t_pg = tcg_temp_new_ptr();
5864 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5865 int desc = 0;
500d0484 5866
d28d12f0
RH
5867 if (s->mte_active[0]) {
5868 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5869 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5870 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5871 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5872 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5873 desc <<= SVE_MTEDESC_SHIFT;
5874 }
cdecb3fc 5875 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5876
5877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5878 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5879 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5880 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5881
5882 tcg_temp_free_ptr(t_zt);
5883 tcg_temp_free_ptr(t_zm);
5884 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5885}
5886
d28d12f0
RH
5887/* Indexed by [mte][be][ff][xs][u][msz]. */
5888static gen_helper_gvec_mem_scatter * const
5889gather_load_fn32[2][2][2][2][2][3] = {
5890 { /* MTE Inactive */
5891 { /* Little-endian */
5892 { { { gen_helper_sve_ldbss_zsu,
5893 gen_helper_sve_ldhss_le_zsu,
5894 NULL, },
5895 { gen_helper_sve_ldbsu_zsu,
5896 gen_helper_sve_ldhsu_le_zsu,
5897 gen_helper_sve_ldss_le_zsu, } },
5898 { { gen_helper_sve_ldbss_zss,
5899 gen_helper_sve_ldhss_le_zss,
5900 NULL, },
5901 { gen_helper_sve_ldbsu_zss,
5902 gen_helper_sve_ldhsu_le_zss,
5903 gen_helper_sve_ldss_le_zss, } } },
5904
5905 /* First-fault */
5906 { { { gen_helper_sve_ldffbss_zsu,
5907 gen_helper_sve_ldffhss_le_zsu,
5908 NULL, },
5909 { gen_helper_sve_ldffbsu_zsu,
5910 gen_helper_sve_ldffhsu_le_zsu,
5911 gen_helper_sve_ldffss_le_zsu, } },
5912 { { gen_helper_sve_ldffbss_zss,
5913 gen_helper_sve_ldffhss_le_zss,
5914 NULL, },
5915 { gen_helper_sve_ldffbsu_zss,
5916 gen_helper_sve_ldffhsu_le_zss,
5917 gen_helper_sve_ldffss_le_zss, } } } },
5918
5919 { /* Big-endian */
5920 { { { gen_helper_sve_ldbss_zsu,
5921 gen_helper_sve_ldhss_be_zsu,
5922 NULL, },
5923 { gen_helper_sve_ldbsu_zsu,
5924 gen_helper_sve_ldhsu_be_zsu,
5925 gen_helper_sve_ldss_be_zsu, } },
5926 { { gen_helper_sve_ldbss_zss,
5927 gen_helper_sve_ldhss_be_zss,
5928 NULL, },
5929 { gen_helper_sve_ldbsu_zss,
5930 gen_helper_sve_ldhsu_be_zss,
5931 gen_helper_sve_ldss_be_zss, } } },
5932
5933 /* First-fault */
5934 { { { gen_helper_sve_ldffbss_zsu,
5935 gen_helper_sve_ldffhss_be_zsu,
5936 NULL, },
5937 { gen_helper_sve_ldffbsu_zsu,
5938 gen_helper_sve_ldffhsu_be_zsu,
5939 gen_helper_sve_ldffss_be_zsu, } },
5940 { { gen_helper_sve_ldffbss_zss,
5941 gen_helper_sve_ldffhss_be_zss,
5942 NULL, },
5943 { gen_helper_sve_ldffbsu_zss,
5944 gen_helper_sve_ldffhsu_be_zss,
5945 gen_helper_sve_ldffss_be_zss, } } } } },
5946 { /* MTE Active */
5947 { /* Little-endian */
5948 { { { gen_helper_sve_ldbss_zsu_mte,
5949 gen_helper_sve_ldhss_le_zsu_mte,
5950 NULL, },
5951 { gen_helper_sve_ldbsu_zsu_mte,
5952 gen_helper_sve_ldhsu_le_zsu_mte,
5953 gen_helper_sve_ldss_le_zsu_mte, } },
5954 { { gen_helper_sve_ldbss_zss_mte,
5955 gen_helper_sve_ldhss_le_zss_mte,
5956 NULL, },
5957 { gen_helper_sve_ldbsu_zss_mte,
5958 gen_helper_sve_ldhsu_le_zss_mte,
5959 gen_helper_sve_ldss_le_zss_mte, } } },
5960
5961 /* First-fault */
5962 { { { gen_helper_sve_ldffbss_zsu_mte,
5963 gen_helper_sve_ldffhss_le_zsu_mte,
5964 NULL, },
5965 { gen_helper_sve_ldffbsu_zsu_mte,
5966 gen_helper_sve_ldffhsu_le_zsu_mte,
5967 gen_helper_sve_ldffss_le_zsu_mte, } },
5968 { { gen_helper_sve_ldffbss_zss_mte,
5969 gen_helper_sve_ldffhss_le_zss_mte,
5970 NULL, },
5971 { gen_helper_sve_ldffbsu_zss_mte,
5972 gen_helper_sve_ldffhsu_le_zss_mte,
5973 gen_helper_sve_ldffss_le_zss_mte, } } } },
5974
5975 { /* Big-endian */
5976 { { { gen_helper_sve_ldbss_zsu_mte,
5977 gen_helper_sve_ldhss_be_zsu_mte,
5978 NULL, },
5979 { gen_helper_sve_ldbsu_zsu_mte,
5980 gen_helper_sve_ldhsu_be_zsu_mte,
5981 gen_helper_sve_ldss_be_zsu_mte, } },
5982 { { gen_helper_sve_ldbss_zss_mte,
5983 gen_helper_sve_ldhss_be_zss_mte,
5984 NULL, },
5985 { gen_helper_sve_ldbsu_zss_mte,
5986 gen_helper_sve_ldhsu_be_zss_mte,
5987 gen_helper_sve_ldss_be_zss_mte, } } },
5988
5989 /* First-fault */
5990 { { { gen_helper_sve_ldffbss_zsu_mte,
5991 gen_helper_sve_ldffhss_be_zsu_mte,
5992 NULL, },
5993 { gen_helper_sve_ldffbsu_zsu_mte,
5994 gen_helper_sve_ldffhsu_be_zsu_mte,
5995 gen_helper_sve_ldffss_be_zsu_mte, } },
5996 { { gen_helper_sve_ldffbss_zss_mte,
5997 gen_helper_sve_ldffhss_be_zss_mte,
5998 NULL, },
5999 { gen_helper_sve_ldffbsu_zss_mte,
6000 gen_helper_sve_ldffhsu_be_zss_mte,
6001 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
6002};
6003
6004/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6005static gen_helper_gvec_mem_scatter * const
6006gather_load_fn64[2][2][2][3][2][4] = {
6007 { /* MTE Inactive */
6008 { /* Little-endian */
6009 { { { gen_helper_sve_ldbds_zsu,
6010 gen_helper_sve_ldhds_le_zsu,
6011 gen_helper_sve_ldsds_le_zsu,
6012 NULL, },
6013 { gen_helper_sve_ldbdu_zsu,
6014 gen_helper_sve_ldhdu_le_zsu,
6015 gen_helper_sve_ldsdu_le_zsu,
6016 gen_helper_sve_lddd_le_zsu, } },
6017 { { gen_helper_sve_ldbds_zss,
6018 gen_helper_sve_ldhds_le_zss,
6019 gen_helper_sve_ldsds_le_zss,
6020 NULL, },
6021 { gen_helper_sve_ldbdu_zss,
6022 gen_helper_sve_ldhdu_le_zss,
6023 gen_helper_sve_ldsdu_le_zss,
6024 gen_helper_sve_lddd_le_zss, } },
6025 { { gen_helper_sve_ldbds_zd,
6026 gen_helper_sve_ldhds_le_zd,
6027 gen_helper_sve_ldsds_le_zd,
6028 NULL, },
6029 { gen_helper_sve_ldbdu_zd,
6030 gen_helper_sve_ldhdu_le_zd,
6031 gen_helper_sve_ldsdu_le_zd,
6032 gen_helper_sve_lddd_le_zd, } } },
6033
6034 /* First-fault */
6035 { { { gen_helper_sve_ldffbds_zsu,
6036 gen_helper_sve_ldffhds_le_zsu,
6037 gen_helper_sve_ldffsds_le_zsu,
6038 NULL, },
6039 { gen_helper_sve_ldffbdu_zsu,
6040 gen_helper_sve_ldffhdu_le_zsu,
6041 gen_helper_sve_ldffsdu_le_zsu,
6042 gen_helper_sve_ldffdd_le_zsu, } },
6043 { { gen_helper_sve_ldffbds_zss,
6044 gen_helper_sve_ldffhds_le_zss,
6045 gen_helper_sve_ldffsds_le_zss,
6046 NULL, },
6047 { gen_helper_sve_ldffbdu_zss,
6048 gen_helper_sve_ldffhdu_le_zss,
6049 gen_helper_sve_ldffsdu_le_zss,
6050 gen_helper_sve_ldffdd_le_zss, } },
6051 { { gen_helper_sve_ldffbds_zd,
6052 gen_helper_sve_ldffhds_le_zd,
6053 gen_helper_sve_ldffsds_le_zd,
6054 NULL, },
6055 { gen_helper_sve_ldffbdu_zd,
6056 gen_helper_sve_ldffhdu_le_zd,
6057 gen_helper_sve_ldffsdu_le_zd,
6058 gen_helper_sve_ldffdd_le_zd, } } } },
6059 { /* Big-endian */
6060 { { { gen_helper_sve_ldbds_zsu,
6061 gen_helper_sve_ldhds_be_zsu,
6062 gen_helper_sve_ldsds_be_zsu,
6063 NULL, },
6064 { gen_helper_sve_ldbdu_zsu,
6065 gen_helper_sve_ldhdu_be_zsu,
6066 gen_helper_sve_ldsdu_be_zsu,
6067 gen_helper_sve_lddd_be_zsu, } },
6068 { { gen_helper_sve_ldbds_zss,
6069 gen_helper_sve_ldhds_be_zss,
6070 gen_helper_sve_ldsds_be_zss,
6071 NULL, },
6072 { gen_helper_sve_ldbdu_zss,
6073 gen_helper_sve_ldhdu_be_zss,
6074 gen_helper_sve_ldsdu_be_zss,
6075 gen_helper_sve_lddd_be_zss, } },
6076 { { gen_helper_sve_ldbds_zd,
6077 gen_helper_sve_ldhds_be_zd,
6078 gen_helper_sve_ldsds_be_zd,
6079 NULL, },
6080 { gen_helper_sve_ldbdu_zd,
6081 gen_helper_sve_ldhdu_be_zd,
6082 gen_helper_sve_ldsdu_be_zd,
6083 gen_helper_sve_lddd_be_zd, } } },
6084
6085 /* First-fault */
6086 { { { gen_helper_sve_ldffbds_zsu,
6087 gen_helper_sve_ldffhds_be_zsu,
6088 gen_helper_sve_ldffsds_be_zsu,
6089 NULL, },
6090 { gen_helper_sve_ldffbdu_zsu,
6091 gen_helper_sve_ldffhdu_be_zsu,
6092 gen_helper_sve_ldffsdu_be_zsu,
6093 gen_helper_sve_ldffdd_be_zsu, } },
6094 { { gen_helper_sve_ldffbds_zss,
6095 gen_helper_sve_ldffhds_be_zss,
6096 gen_helper_sve_ldffsds_be_zss,
6097 NULL, },
6098 { gen_helper_sve_ldffbdu_zss,
6099 gen_helper_sve_ldffhdu_be_zss,
6100 gen_helper_sve_ldffsdu_be_zss,
6101 gen_helper_sve_ldffdd_be_zss, } },
6102 { { gen_helper_sve_ldffbds_zd,
6103 gen_helper_sve_ldffhds_be_zd,
6104 gen_helper_sve_ldffsds_be_zd,
6105 NULL, },
6106 { gen_helper_sve_ldffbdu_zd,
6107 gen_helper_sve_ldffhdu_be_zd,
6108 gen_helper_sve_ldffsdu_be_zd,
6109 gen_helper_sve_ldffdd_be_zd, } } } } },
6110 { /* MTE Active */
6111 { /* Little-endian */
6112 { { { gen_helper_sve_ldbds_zsu_mte,
6113 gen_helper_sve_ldhds_le_zsu_mte,
6114 gen_helper_sve_ldsds_le_zsu_mte,
6115 NULL, },
6116 { gen_helper_sve_ldbdu_zsu_mte,
6117 gen_helper_sve_ldhdu_le_zsu_mte,
6118 gen_helper_sve_ldsdu_le_zsu_mte,
6119 gen_helper_sve_lddd_le_zsu_mte, } },
6120 { { gen_helper_sve_ldbds_zss_mte,
6121 gen_helper_sve_ldhds_le_zss_mte,
6122 gen_helper_sve_ldsds_le_zss_mte,
6123 NULL, },
6124 { gen_helper_sve_ldbdu_zss_mte,
6125 gen_helper_sve_ldhdu_le_zss_mte,
6126 gen_helper_sve_ldsdu_le_zss_mte,
6127 gen_helper_sve_lddd_le_zss_mte, } },
6128 { { gen_helper_sve_ldbds_zd_mte,
6129 gen_helper_sve_ldhds_le_zd_mte,
6130 gen_helper_sve_ldsds_le_zd_mte,
6131 NULL, },
6132 { gen_helper_sve_ldbdu_zd_mte,
6133 gen_helper_sve_ldhdu_le_zd_mte,
6134 gen_helper_sve_ldsdu_le_zd_mte,
6135 gen_helper_sve_lddd_le_zd_mte, } } },
6136
6137 /* First-fault */
6138 { { { gen_helper_sve_ldffbds_zsu_mte,
6139 gen_helper_sve_ldffhds_le_zsu_mte,
6140 gen_helper_sve_ldffsds_le_zsu_mte,
6141 NULL, },
6142 { gen_helper_sve_ldffbdu_zsu_mte,
6143 gen_helper_sve_ldffhdu_le_zsu_mte,
6144 gen_helper_sve_ldffsdu_le_zsu_mte,
6145 gen_helper_sve_ldffdd_le_zsu_mte, } },
6146 { { gen_helper_sve_ldffbds_zss_mte,
6147 gen_helper_sve_ldffhds_le_zss_mte,
6148 gen_helper_sve_ldffsds_le_zss_mte,
6149 NULL, },
6150 { gen_helper_sve_ldffbdu_zss_mte,
6151 gen_helper_sve_ldffhdu_le_zss_mte,
6152 gen_helper_sve_ldffsdu_le_zss_mte,
6153 gen_helper_sve_ldffdd_le_zss_mte, } },
6154 { { gen_helper_sve_ldffbds_zd_mte,
6155 gen_helper_sve_ldffhds_le_zd_mte,
6156 gen_helper_sve_ldffsds_le_zd_mte,
6157 NULL, },
6158 { gen_helper_sve_ldffbdu_zd_mte,
6159 gen_helper_sve_ldffhdu_le_zd_mte,
6160 gen_helper_sve_ldffsdu_le_zd_mte,
6161 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6162 { /* Big-endian */
6163 { { { gen_helper_sve_ldbds_zsu_mte,
6164 gen_helper_sve_ldhds_be_zsu_mte,
6165 gen_helper_sve_ldsds_be_zsu_mte,
6166 NULL, },
6167 { gen_helper_sve_ldbdu_zsu_mte,
6168 gen_helper_sve_ldhdu_be_zsu_mte,
6169 gen_helper_sve_ldsdu_be_zsu_mte,
6170 gen_helper_sve_lddd_be_zsu_mte, } },
6171 { { gen_helper_sve_ldbds_zss_mte,
6172 gen_helper_sve_ldhds_be_zss_mte,
6173 gen_helper_sve_ldsds_be_zss_mte,
6174 NULL, },
6175 { gen_helper_sve_ldbdu_zss_mte,
6176 gen_helper_sve_ldhdu_be_zss_mte,
6177 gen_helper_sve_ldsdu_be_zss_mte,
6178 gen_helper_sve_lddd_be_zss_mte, } },
6179 { { gen_helper_sve_ldbds_zd_mte,
6180 gen_helper_sve_ldhds_be_zd_mte,
6181 gen_helper_sve_ldsds_be_zd_mte,
6182 NULL, },
6183 { gen_helper_sve_ldbdu_zd_mte,
6184 gen_helper_sve_ldhdu_be_zd_mte,
6185 gen_helper_sve_ldsdu_be_zd_mte,
6186 gen_helper_sve_lddd_be_zd_mte, } } },
6187
6188 /* First-fault */
6189 { { { gen_helper_sve_ldffbds_zsu_mte,
6190 gen_helper_sve_ldffhds_be_zsu_mte,
6191 gen_helper_sve_ldffsds_be_zsu_mte,
6192 NULL, },
6193 { gen_helper_sve_ldffbdu_zsu_mte,
6194 gen_helper_sve_ldffhdu_be_zsu_mte,
6195 gen_helper_sve_ldffsdu_be_zsu_mte,
6196 gen_helper_sve_ldffdd_be_zsu_mte, } },
6197 { { gen_helper_sve_ldffbds_zss_mte,
6198 gen_helper_sve_ldffhds_be_zss_mte,
6199 gen_helper_sve_ldffsds_be_zss_mte,
6200 NULL, },
6201 { gen_helper_sve_ldffbdu_zss_mte,
6202 gen_helper_sve_ldffhdu_be_zss_mte,
6203 gen_helper_sve_ldffsdu_be_zss_mte,
6204 gen_helper_sve_ldffdd_be_zss_mte, } },
6205 { { gen_helper_sve_ldffbds_zd_mte,
6206 gen_helper_sve_ldffhds_be_zd_mte,
6207 gen_helper_sve_ldffsds_be_zd_mte,
6208 NULL, },
6209 { gen_helper_sve_ldffbdu_zd_mte,
6210 gen_helper_sve_ldffhdu_be_zd_mte,
6211 gen_helper_sve_ldffsdu_be_zd_mte,
6212 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6213};
6214
3a7be554 6215static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6216{
6217 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6218 bool be = s->be_data == MO_BE;
6219 bool mte = s->mte_active[0];
673e9fa6
RH
6220
6221 if (!sve_access_check(s)) {
6222 return true;
6223 }
6224
6225 switch (a->esz) {
6226 case MO_32:
d28d12f0 6227 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6228 break;
6229 case MO_64:
d28d12f0 6230 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6231 break;
6232 }
6233 assert(fn != NULL);
6234
6235 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6236 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6237 return true;
6238}
6239
3a7be554 6240static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6241{
6242 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6243 bool be = s->be_data == MO_BE;
6244 bool mte = s->mte_active[0];
673e9fa6
RH
6245
6246 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6247 return false;
6248 }
6249 if (!sve_access_check(s)) {
6250 return true;
6251 }
6252
6253 switch (a->esz) {
6254 case MO_32:
d28d12f0 6255 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6256 break;
6257 case MO_64:
d28d12f0 6258 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6259 break;
6260 }
6261 assert(fn != NULL);
6262
6263 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6264 * by loading the immediate into the scalar parameter.
6265 */
2ccdf94f
RH
6266 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6267 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6268 return true;
6269}
6270
cf327449
SL
6271static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6272{
b17ab470
RH
6273 gen_helper_gvec_mem_scatter *fn = NULL;
6274 bool be = s->be_data == MO_BE;
6275 bool mte = s->mte_active[0];
6276
6277 if (a->esz < a->msz + !a->u) {
6278 return false;
6279 }
cf327449
SL
6280 if (!dc_isar_feature(aa64_sve2, s)) {
6281 return false;
6282 }
b17ab470
RH
6283 if (!sve_access_check(s)) {
6284 return true;
6285 }
6286
6287 switch (a->esz) {
6288 case MO_32:
6289 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6290 break;
6291 case MO_64:
6292 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6293 break;
6294 }
6295 assert(fn != NULL);
6296
6297 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6298 cpu_reg(s, a->rm), a->msz, false, fn);
6299 return true;
cf327449
SL
6300}
6301
d28d12f0
RH
6302/* Indexed by [mte][be][xs][msz]. */
6303static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6304 { /* MTE Inactive */
6305 { /* Little-endian */
6306 { gen_helper_sve_stbs_zsu,
6307 gen_helper_sve_sths_le_zsu,
6308 gen_helper_sve_stss_le_zsu, },
6309 { gen_helper_sve_stbs_zss,
6310 gen_helper_sve_sths_le_zss,
6311 gen_helper_sve_stss_le_zss, } },
6312 { /* Big-endian */
6313 { gen_helper_sve_stbs_zsu,
6314 gen_helper_sve_sths_be_zsu,
6315 gen_helper_sve_stss_be_zsu, },
6316 { gen_helper_sve_stbs_zss,
6317 gen_helper_sve_sths_be_zss,
6318 gen_helper_sve_stss_be_zss, } } },
6319 { /* MTE Active */
6320 { /* Little-endian */
6321 { gen_helper_sve_stbs_zsu_mte,
6322 gen_helper_sve_sths_le_zsu_mte,
6323 gen_helper_sve_stss_le_zsu_mte, },
6324 { gen_helper_sve_stbs_zss_mte,
6325 gen_helper_sve_sths_le_zss_mte,
6326 gen_helper_sve_stss_le_zss_mte, } },
6327 { /* Big-endian */
6328 { gen_helper_sve_stbs_zsu_mte,
6329 gen_helper_sve_sths_be_zsu_mte,
6330 gen_helper_sve_stss_be_zsu_mte, },
6331 { gen_helper_sve_stbs_zss_mte,
6332 gen_helper_sve_sths_be_zss_mte,
6333 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6334};
6335
6336/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6337static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6338 { /* MTE Inactive */
6339 { /* Little-endian */
6340 { gen_helper_sve_stbd_zsu,
6341 gen_helper_sve_sthd_le_zsu,
6342 gen_helper_sve_stsd_le_zsu,
6343 gen_helper_sve_stdd_le_zsu, },
6344 { gen_helper_sve_stbd_zss,
6345 gen_helper_sve_sthd_le_zss,
6346 gen_helper_sve_stsd_le_zss,
6347 gen_helper_sve_stdd_le_zss, },
6348 { gen_helper_sve_stbd_zd,
6349 gen_helper_sve_sthd_le_zd,
6350 gen_helper_sve_stsd_le_zd,
6351 gen_helper_sve_stdd_le_zd, } },
6352 { /* Big-endian */
6353 { gen_helper_sve_stbd_zsu,
6354 gen_helper_sve_sthd_be_zsu,
6355 gen_helper_sve_stsd_be_zsu,
6356 gen_helper_sve_stdd_be_zsu, },
6357 { gen_helper_sve_stbd_zss,
6358 gen_helper_sve_sthd_be_zss,
6359 gen_helper_sve_stsd_be_zss,
6360 gen_helper_sve_stdd_be_zss, },
6361 { gen_helper_sve_stbd_zd,
6362 gen_helper_sve_sthd_be_zd,
6363 gen_helper_sve_stsd_be_zd,
6364 gen_helper_sve_stdd_be_zd, } } },
6365 { /* MTE Inactive */
6366 { /* Little-endian */
6367 { gen_helper_sve_stbd_zsu_mte,
6368 gen_helper_sve_sthd_le_zsu_mte,
6369 gen_helper_sve_stsd_le_zsu_mte,
6370 gen_helper_sve_stdd_le_zsu_mte, },
6371 { gen_helper_sve_stbd_zss_mte,
6372 gen_helper_sve_sthd_le_zss_mte,
6373 gen_helper_sve_stsd_le_zss_mte,
6374 gen_helper_sve_stdd_le_zss_mte, },
6375 { gen_helper_sve_stbd_zd_mte,
6376 gen_helper_sve_sthd_le_zd_mte,
6377 gen_helper_sve_stsd_le_zd_mte,
6378 gen_helper_sve_stdd_le_zd_mte, } },
6379 { /* Big-endian */
6380 { gen_helper_sve_stbd_zsu_mte,
6381 gen_helper_sve_sthd_be_zsu_mte,
6382 gen_helper_sve_stsd_be_zsu_mte,
6383 gen_helper_sve_stdd_be_zsu_mte, },
6384 { gen_helper_sve_stbd_zss_mte,
6385 gen_helper_sve_sthd_be_zss_mte,
6386 gen_helper_sve_stsd_be_zss_mte,
6387 gen_helper_sve_stdd_be_zss_mte, },
6388 { gen_helper_sve_stbd_zd_mte,
6389 gen_helper_sve_sthd_be_zd_mte,
6390 gen_helper_sve_stsd_be_zd_mte,
6391 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6392};
6393
3a7be554 6394static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6395{
f6dbf62a 6396 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6397 bool be = s->be_data == MO_BE;
6398 bool mte = s->mte_active[0];
f6dbf62a
RH
6399
6400 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6401 return false;
6402 }
6403 if (!sve_access_check(s)) {
6404 return true;
6405 }
6406 switch (a->esz) {
6407 case MO_32:
d28d12f0 6408 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6409 break;
6410 case MO_64:
d28d12f0 6411 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6412 break;
6413 default:
6414 g_assert_not_reached();
6415 }
6416 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6417 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6418 return true;
6419}
dec6cf6b 6420
3a7be554 6421static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6422{
6423 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6424 bool be = s->be_data == MO_BE;
6425 bool mte = s->mte_active[0];
408ecde9
RH
6426
6427 if (a->esz < a->msz) {
6428 return false;
6429 }
6430 if (!sve_access_check(s)) {
6431 return true;
6432 }
6433
6434 switch (a->esz) {
6435 case MO_32:
d28d12f0 6436 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6437 break;
6438 case MO_64:
d28d12f0 6439 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6440 break;
6441 }
6442 assert(fn != NULL);
6443
6444 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6445 * by loading the immediate into the scalar parameter.
6446 */
2ccdf94f
RH
6447 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6448 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6449 return true;
6450}
6451
6ebca45f
SL
6452static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6453{
b17ab470
RH
6454 gen_helper_gvec_mem_scatter *fn;
6455 bool be = s->be_data == MO_BE;
6456 bool mte = s->mte_active[0];
6457
6458 if (a->esz < a->msz) {
6459 return false;
6460 }
6ebca45f
SL
6461 if (!dc_isar_feature(aa64_sve2, s)) {
6462 return false;
6463 }
b17ab470
RH
6464 if (!sve_access_check(s)) {
6465 return true;
6466 }
6467
6468 switch (a->esz) {
6469 case MO_32:
6470 fn = scatter_store_fn32[mte][be][0][a->msz];
6471 break;
6472 case MO_64:
6473 fn = scatter_store_fn64[mte][be][2][a->msz];
6474 break;
6475 default:
6476 g_assert_not_reached();
6477 }
6478
6479 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6480 cpu_reg(s, a->rm), a->msz, true, fn);
6481 return true;
6ebca45f
SL
6482}
6483
dec6cf6b
RH
6484/*
6485 * Prefetches
6486 */
6487
3a7be554 6488static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6489{
6490 /* Prefetch is a nop within QEMU. */
2f95a3b0 6491 (void)sve_access_check(s);
dec6cf6b
RH
6492 return true;
6493}
6494
3a7be554 6495static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6496{
6497 if (a->rm == 31) {
6498 return false;
6499 }
6500 /* Prefetch is a nop within QEMU. */
2f95a3b0 6501 (void)sve_access_check(s);
dec6cf6b
RH
6502 return true;
6503}
a2103582
RH
6504
6505/*
6506 * Move Prefix
6507 *
6508 * TODO: The implementation so far could handle predicated merging movprfx.
6509 * The helper functions as written take an extra source register to
6510 * use in the operation, but the result is only written when predication
6511 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6512 * to allow the final write back to the destination to be unconditional.
6513 * For predicated zeroing movprfx, we need to rearrange the helpers to
6514 * allow the final write back to zero inactives.
6515 *
6516 * In the meantime, just emit the moves.
6517 */
6518
3a7be554 6519static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6520{
6521 return do_mov_z(s, a->rd, a->rn);
6522}
6523
3a7be554 6524static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6525{
6526 if (sve_access_check(s)) {
6527 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6528 }
6529 return true;
6530}
6531
3a7be554 6532static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6533{
60245996 6534 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6535}
5dad1ba5
RH
6536
6537/*
6538 * SVE2 Integer Multiply - Unpredicated
6539 */
6540
6541static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6542{
6543 if (!dc_isar_feature(aa64_sve2, s)) {
6544 return false;
6545 }
6546 if (sve_access_check(s)) {
6547 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6548 }
6549 return true;
6550}
6551
bd394cf5
RH
6552static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6553 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6554 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6555};
6556TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6557 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6558
bd394cf5
RH
6559static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6560 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6561 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6562};
6563TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6564 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6565
bd394cf5
RH
6566TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6567 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6568
bd394cf5
RH
6569static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6570 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6571 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6572};
6573TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6574 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6575
bd394cf5
RH
6576static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6577 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6578 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6579};
6580TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6581 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6582
d4b1e59d
RH
6583/*
6584 * SVE2 Integer - Predicated
6585 */
6586
6587static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6588 gen_helper_gvec_4 *fn)
6589{
6590 if (!dc_isar_feature(aa64_sve2, s)) {
6591 return false;
6592 }
6593 return do_zpzz_ool(s, a, fn);
6594}
6595
6596static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6597{
6598 static gen_helper_gvec_4 * const fns[3] = {
6599 gen_helper_sve2_sadalp_zpzz_h,
6600 gen_helper_sve2_sadalp_zpzz_s,
6601 gen_helper_sve2_sadalp_zpzz_d,
6602 };
6603 if (a->esz == 0) {
6604 return false;
6605 }
6606 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6607}
6608
6609static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6610{
6611 static gen_helper_gvec_4 * const fns[3] = {
6612 gen_helper_sve2_uadalp_zpzz_h,
6613 gen_helper_sve2_uadalp_zpzz_s,
6614 gen_helper_sve2_uadalp_zpzz_d,
6615 };
6616 if (a->esz == 0) {
6617 return false;
6618 }
6619 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6620}
db366da8
RH
6621
6622/*
6623 * SVE2 integer unary operations (predicated)
6624 */
6625
6626static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6627 gen_helper_gvec_3 *fn)
6628{
6629 if (!dc_isar_feature(aa64_sve2, s)) {
6630 return false;
6631 }
b051809a 6632 return gen_gvec_ool_arg_zpz(s, fn, a, 0);
db366da8
RH
6633}
6634
6635static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6636{
6637 if (a->esz != 2) {
6638 return false;
6639 }
6640 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6641}
6642
6643static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6644{
6645 if (a->esz != 2) {
6646 return false;
6647 }
6648 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6649}
6650
6651static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6652{
6653 static gen_helper_gvec_3 * const fns[4] = {
6654 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6655 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6656 };
6657 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6658}
6659
6660static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6661{
6662 static gen_helper_gvec_3 * const fns[4] = {
6663 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6664 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6665 };
6666 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6667}
45d9503d
RH
6668
6669#define DO_SVE2_ZPZZ(NAME, name) \
6670static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6671{ \
6672 static gen_helper_gvec_4 * const fns[4] = { \
6673 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6674 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6675 }; \
6676 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6677}
6678
6679DO_SVE2_ZPZZ(SQSHL, sqshl)
6680DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6681DO_SVE2_ZPZZ(SRSHL, srshl)
6682
6683DO_SVE2_ZPZZ(UQSHL, uqshl)
6684DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6685DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6686
6687DO_SVE2_ZPZZ(SHADD, shadd)
6688DO_SVE2_ZPZZ(SRHADD, srhadd)
6689DO_SVE2_ZPZZ(SHSUB, shsub)
6690
6691DO_SVE2_ZPZZ(UHADD, uhadd)
6692DO_SVE2_ZPZZ(URHADD, urhadd)
6693DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6694
6695DO_SVE2_ZPZZ(ADDP, addp)
6696DO_SVE2_ZPZZ(SMAXP, smaxp)
6697DO_SVE2_ZPZZ(UMAXP, umaxp)
6698DO_SVE2_ZPZZ(SMINP, sminp)
6699DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6700
6701DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6702DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6703DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6704DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6705DO_SVE2_ZPZZ(SUQADD, suqadd)
6706DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6707
6708/*
6709 * SVE2 Widening Integer Arithmetic
6710 */
6711
615f19fe
RH
6712static gen_helper_gvec_3 * const saddl_fns[4] = {
6713 NULL, gen_helper_sve2_saddl_h,
6714 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6715};
6716TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6717 saddl_fns[a->esz], a, 0)
6718TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6719 saddl_fns[a->esz], a, 3)
6720TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6721 saddl_fns[a->esz], a, 2)
6722
6723static gen_helper_gvec_3 * const ssubl_fns[4] = {
6724 NULL, gen_helper_sve2_ssubl_h,
6725 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6726};
6727TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6728 ssubl_fns[a->esz], a, 0)
6729TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6730 ssubl_fns[a->esz], a, 3)
6731TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6732 ssubl_fns[a->esz], a, 2)
6733TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6734 ssubl_fns[a->esz], a, 1)
6735
6736static gen_helper_gvec_3 * const sabdl_fns[4] = {
6737 NULL, gen_helper_sve2_sabdl_h,
6738 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6739};
6740TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6741 sabdl_fns[a->esz], a, 0)
6742TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6743 sabdl_fns[a->esz], a, 3)
6744
6745static gen_helper_gvec_3 * const uaddl_fns[4] = {
6746 NULL, gen_helper_sve2_uaddl_h,
6747 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6748};
6749TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6750 uaddl_fns[a->esz], a, 0)
6751TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6752 uaddl_fns[a->esz], a, 3)
6753
6754static gen_helper_gvec_3 * const usubl_fns[4] = {
6755 NULL, gen_helper_sve2_usubl_h,
6756 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6757};
6758TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6759 usubl_fns[a->esz], a, 0)
6760TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6761 usubl_fns[a->esz], a, 3)
6762
6763static gen_helper_gvec_3 * const uabdl_fns[4] = {
6764 NULL, gen_helper_sve2_uabdl_h,
6765 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6766};
6767TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6768 uabdl_fns[a->esz], a, 0)
6769TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6770 uabdl_fns[a->esz], a, 3)
6771
6772static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6773 NULL, gen_helper_sve2_sqdmull_zzz_h,
6774 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6775};
6776TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6777 sqdmull_fns[a->esz], a, 0)
6778TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6779 sqdmull_fns[a->esz], a, 3)
6780
6781static gen_helper_gvec_3 * const smull_fns[4] = {
6782 NULL, gen_helper_sve2_smull_zzz_h,
6783 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6784};
6785TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6786 smull_fns[a->esz], a, 0)
6787TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6788 smull_fns[a->esz], a, 3)
6789
6790static gen_helper_gvec_3 * const umull_fns[4] = {
6791 NULL, gen_helper_sve2_umull_zzz_h,
6792 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6793};
6794TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6795 umull_fns[a->esz], a, 0)
6796TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6797 umull_fns[a->esz], a, 3)
6798
6799static gen_helper_gvec_3 * const eoril_fns[4] = {
6800 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6801 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6802};
6803TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6804TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6805
e3a56131
RH
6806static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6807{
6808 static gen_helper_gvec_3 * const fns[4] = {
6809 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6810 NULL, gen_helper_sve2_pmull_d,
6811 };
6812 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6813 return false;
6814 }
615f19fe 6815 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6816}
6817
615f19fe
RH
6818TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6819TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6820
615f19fe
RH
6821static gen_helper_gvec_3 * const saddw_fns[4] = {
6822 NULL, gen_helper_sve2_saddw_h,
6823 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6824};
6825TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6826TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6827
615f19fe
RH
6828static gen_helper_gvec_3 * const ssubw_fns[4] = {
6829 NULL, gen_helper_sve2_ssubw_h,
6830 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6831};
6832TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6833TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6834
615f19fe
RH
6835static gen_helper_gvec_3 * const uaddw_fns[4] = {
6836 NULL, gen_helper_sve2_uaddw_h,
6837 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6838};
6839TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6840TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6841
615f19fe
RH
6842static gen_helper_gvec_3 * const usubw_fns[4] = {
6843 NULL, gen_helper_sve2_usubw_h,
6844 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6845};
6846TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6847TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6848
6849static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6850{
6851 int top = imm & 1;
6852 int shl = imm >> 1;
6853 int halfbits = 4 << vece;
6854
6855 if (top) {
6856 if (shl == halfbits) {
6857 TCGv_vec t = tcg_temp_new_vec_matching(d);
6858 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6859 tcg_gen_and_vec(vece, d, n, t);
6860 tcg_temp_free_vec(t);
6861 } else {
6862 tcg_gen_sari_vec(vece, d, n, halfbits);
6863 tcg_gen_shli_vec(vece, d, d, shl);
6864 }
6865 } else {
6866 tcg_gen_shli_vec(vece, d, n, halfbits);
6867 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6868 }
6869}
6870
6871static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6872{
6873 int halfbits = 4 << vece;
6874 int top = imm & 1;
6875 int shl = (imm >> 1);
6876 int shift;
6877 uint64_t mask;
6878
6879 mask = MAKE_64BIT_MASK(0, halfbits);
6880 mask <<= shl;
6881 mask = dup_const(vece, mask);
6882
6883 shift = shl - top * halfbits;
6884 if (shift < 0) {
6885 tcg_gen_shri_i64(d, n, -shift);
6886 } else {
6887 tcg_gen_shli_i64(d, n, shift);
6888 }
6889 tcg_gen_andi_i64(d, d, mask);
6890}
6891
6892static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6893{
6894 gen_ushll_i64(MO_16, d, n, imm);
6895}
6896
6897static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6898{
6899 gen_ushll_i64(MO_32, d, n, imm);
6900}
6901
6902static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6903{
6904 gen_ushll_i64(MO_64, d, n, imm);
6905}
6906
6907static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6908{
6909 int halfbits = 4 << vece;
6910 int top = imm & 1;
6911 int shl = imm >> 1;
6912
6913 if (top) {
6914 if (shl == halfbits) {
6915 TCGv_vec t = tcg_temp_new_vec_matching(d);
6916 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6917 tcg_gen_and_vec(vece, d, n, t);
6918 tcg_temp_free_vec(t);
6919 } else {
6920 tcg_gen_shri_vec(vece, d, n, halfbits);
6921 tcg_gen_shli_vec(vece, d, d, shl);
6922 }
6923 } else {
6924 if (shl == 0) {
6925 TCGv_vec t = tcg_temp_new_vec_matching(d);
6926 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6927 tcg_gen_and_vec(vece, d, n, t);
6928 tcg_temp_free_vec(t);
6929 } else {
6930 tcg_gen_shli_vec(vece, d, n, halfbits);
6931 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6932 }
6933 }
6934}
6935
6936static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6937 bool sel, bool uns)
6938{
6939 static const TCGOpcode sshll_list[] = {
6940 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6941 };
6942 static const TCGOpcode ushll_list[] = {
6943 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6944 };
6945 static const GVecGen2i ops[2][3] = {
6946 { { .fniv = gen_sshll_vec,
6947 .opt_opc = sshll_list,
6948 .fno = gen_helper_sve2_sshll_h,
6949 .vece = MO_16 },
6950 { .fniv = gen_sshll_vec,
6951 .opt_opc = sshll_list,
6952 .fno = gen_helper_sve2_sshll_s,
6953 .vece = MO_32 },
6954 { .fniv = gen_sshll_vec,
6955 .opt_opc = sshll_list,
6956 .fno = gen_helper_sve2_sshll_d,
6957 .vece = MO_64 } },
6958 { { .fni8 = gen_ushll16_i64,
6959 .fniv = gen_ushll_vec,
6960 .opt_opc = ushll_list,
6961 .fno = gen_helper_sve2_ushll_h,
6962 .vece = MO_16 },
6963 { .fni8 = gen_ushll32_i64,
6964 .fniv = gen_ushll_vec,
6965 .opt_opc = ushll_list,
6966 .fno = gen_helper_sve2_ushll_s,
6967 .vece = MO_32 },
6968 { .fni8 = gen_ushll64_i64,
6969 .fniv = gen_ushll_vec,
6970 .opt_opc = ushll_list,
6971 .fno = gen_helper_sve2_ushll_d,
6972 .vece = MO_64 } },
6973 };
6974
6975 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6976 return false;
6977 }
6978 if (sve_access_check(s)) {
6979 unsigned vsz = vec_full_reg_size(s);
6980 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6981 vec_full_reg_offset(s, a->rn),
6982 vsz, vsz, (a->imm << 1) | sel,
6983 &ops[uns][a->esz]);
6984 }
6985 return true;
6986}
6987
6988static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6989{
6990 return do_sve2_shll_tb(s, a, false, false);
6991}
6992
6993static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6994{
6995 return do_sve2_shll_tb(s, a, true, false);
6996}
6997
6998static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6999{
7000 return do_sve2_shll_tb(s, a, false, true);
7001}
7002
7003static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7004{
7005 return do_sve2_shll_tb(s, a, true, true);
7006}
cb9c33b8 7007
615f19fe
RH
7008static gen_helper_gvec_3 * const bext_fns[4] = {
7009 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7010 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7011};
7012TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
7013 bext_fns[a->esz], a, 0)
ed4a6387 7014
615f19fe
RH
7015static gen_helper_gvec_3 * const bdep_fns[4] = {
7016 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7017 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7018};
7019TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
7020 bdep_fns[a->esz], a, 0)
ed4a6387 7021
615f19fe
RH
7022static gen_helper_gvec_3 * const bgrp_fns[4] = {
7023 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7024 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7025};
7026TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
7027 bgrp_fns[a->esz], a, 0)
ed4a6387 7028
615f19fe
RH
7029static gen_helper_gvec_3 * const cadd_fns[4] = {
7030 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7031 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
7032};
7033TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
7034 cadd_fns[a->esz], a, 0)
7035TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
7036 cadd_fns[a->esz], a, 1)
7037
7038static gen_helper_gvec_3 * const sqcadd_fns[4] = {
7039 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7040 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
7041};
7042TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
7043 sqcadd_fns[a->esz], a, 0)
7044TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
7045 sqcadd_fns[a->esz], a, 1)
38650638 7046
eeb4e84d
RH
7047static gen_helper_gvec_4 * const sabal_fns[4] = {
7048 NULL, gen_helper_sve2_sabal_h,
7049 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
7050};
7051TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
7052TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 7053
eeb4e84d
RH
7054static gen_helper_gvec_4 * const uabal_fns[4] = {
7055 NULL, gen_helper_sve2_uabal_h,
7056 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
7057};
7058TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
7059TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
7060
7061static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7062{
7063 static gen_helper_gvec_4 * const fns[2] = {
7064 gen_helper_sve2_adcl_s,
7065 gen_helper_sve2_adcl_d,
7066 };
7067 /*
7068 * Note that in this case the ESZ field encodes both size and sign.
7069 * Split out 'subtract' into bit 1 of the data field for the helper.
7070 */
eeb4e84d 7071 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
7072}
7073
eeb4e84d
RH
7074TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
7075TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e
RH
7076
7077static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7078{
7079 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7080 return false;
7081 }
7082 if (sve_access_check(s)) {
7083 unsigned vsz = vec_full_reg_size(s);
7084 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7085 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7086 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7087 }
7088 return true;
7089}
7090
7091static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7092{
7093 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7094}
7095
7096static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7097{
7098 return do_sve2_fn2i(s, a, gen_gvec_usra);
7099}
7100
7101static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7102{
7103 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7104}
7105
7106static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7107{
7108 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7109}
fc12b46a
RH
7110
7111static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7112{
7113 return do_sve2_fn2i(s, a, gen_gvec_sri);
7114}
7115
7116static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7117{
7118 return do_sve2_fn2i(s, a, gen_gvec_sli);
7119}
289a1797
RH
7120
7121static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7122{
7123 if (!dc_isar_feature(aa64_sve2, s)) {
7124 return false;
7125 }
7126 if (sve_access_check(s)) {
7127 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7128 }
7129 return true;
7130}
7131
7132static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7133{
7134 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7135}
7136
7137static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7138{
7139 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7140}
5ff2838d
RH
7141
7142static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7143 const GVecGen2 ops[3])
7144{
7145 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7146 !dc_isar_feature(aa64_sve2, s)) {
7147 return false;
7148 }
7149 if (sve_access_check(s)) {
7150 unsigned vsz = vec_full_reg_size(s);
7151 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7152 vec_full_reg_offset(s, a->rn),
7153 vsz, vsz, &ops[a->esz]);
7154 }
7155 return true;
7156}
7157
7158static const TCGOpcode sqxtn_list[] = {
7159 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7160};
7161
7162static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7163{
7164 TCGv_vec t = tcg_temp_new_vec_matching(d);
7165 int halfbits = 4 << vece;
7166 int64_t mask = (1ull << halfbits) - 1;
7167 int64_t min = -1ull << (halfbits - 1);
7168 int64_t max = -min - 1;
7169
7170 tcg_gen_dupi_vec(vece, t, min);
7171 tcg_gen_smax_vec(vece, d, n, t);
7172 tcg_gen_dupi_vec(vece, t, max);
7173 tcg_gen_smin_vec(vece, d, d, t);
7174 tcg_gen_dupi_vec(vece, t, mask);
7175 tcg_gen_and_vec(vece, d, d, t);
7176 tcg_temp_free_vec(t);
7177}
7178
7179static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7180{
7181 static const GVecGen2 ops[3] = {
7182 { .fniv = gen_sqxtnb_vec,
7183 .opt_opc = sqxtn_list,
7184 .fno = gen_helper_sve2_sqxtnb_h,
7185 .vece = MO_16 },
7186 { .fniv = gen_sqxtnb_vec,
7187 .opt_opc = sqxtn_list,
7188 .fno = gen_helper_sve2_sqxtnb_s,
7189 .vece = MO_32 },
7190 { .fniv = gen_sqxtnb_vec,
7191 .opt_opc = sqxtn_list,
7192 .fno = gen_helper_sve2_sqxtnb_d,
7193 .vece = MO_64 },
7194 };
7195 return do_sve2_narrow_extract(s, a, ops);
7196}
7197
7198static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7199{
7200 TCGv_vec t = tcg_temp_new_vec_matching(d);
7201 int halfbits = 4 << vece;
7202 int64_t mask = (1ull << halfbits) - 1;
7203 int64_t min = -1ull << (halfbits - 1);
7204 int64_t max = -min - 1;
7205
7206 tcg_gen_dupi_vec(vece, t, min);
7207 tcg_gen_smax_vec(vece, n, n, t);
7208 tcg_gen_dupi_vec(vece, t, max);
7209 tcg_gen_smin_vec(vece, n, n, t);
7210 tcg_gen_shli_vec(vece, n, n, halfbits);
7211 tcg_gen_dupi_vec(vece, t, mask);
7212 tcg_gen_bitsel_vec(vece, d, t, d, n);
7213 tcg_temp_free_vec(t);
7214}
7215
7216static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7217{
7218 static const GVecGen2 ops[3] = {
7219 { .fniv = gen_sqxtnt_vec,
7220 .opt_opc = sqxtn_list,
7221 .load_dest = true,
7222 .fno = gen_helper_sve2_sqxtnt_h,
7223 .vece = MO_16 },
7224 { .fniv = gen_sqxtnt_vec,
7225 .opt_opc = sqxtn_list,
7226 .load_dest = true,
7227 .fno = gen_helper_sve2_sqxtnt_s,
7228 .vece = MO_32 },
7229 { .fniv = gen_sqxtnt_vec,
7230 .opt_opc = sqxtn_list,
7231 .load_dest = true,
7232 .fno = gen_helper_sve2_sqxtnt_d,
7233 .vece = MO_64 },
7234 };
7235 return do_sve2_narrow_extract(s, a, ops);
7236}
7237
7238static const TCGOpcode uqxtn_list[] = {
7239 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7240};
7241
7242static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7243{
7244 TCGv_vec t = tcg_temp_new_vec_matching(d);
7245 int halfbits = 4 << vece;
7246 int64_t max = (1ull << halfbits) - 1;
7247
7248 tcg_gen_dupi_vec(vece, t, max);
7249 tcg_gen_umin_vec(vece, d, n, t);
7250 tcg_temp_free_vec(t);
7251}
7252
7253static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7254{
7255 static const GVecGen2 ops[3] = {
7256 { .fniv = gen_uqxtnb_vec,
7257 .opt_opc = uqxtn_list,
7258 .fno = gen_helper_sve2_uqxtnb_h,
7259 .vece = MO_16 },
7260 { .fniv = gen_uqxtnb_vec,
7261 .opt_opc = uqxtn_list,
7262 .fno = gen_helper_sve2_uqxtnb_s,
7263 .vece = MO_32 },
7264 { .fniv = gen_uqxtnb_vec,
7265 .opt_opc = uqxtn_list,
7266 .fno = gen_helper_sve2_uqxtnb_d,
7267 .vece = MO_64 },
7268 };
7269 return do_sve2_narrow_extract(s, a, ops);
7270}
7271
7272static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7273{
7274 TCGv_vec t = tcg_temp_new_vec_matching(d);
7275 int halfbits = 4 << vece;
7276 int64_t max = (1ull << halfbits) - 1;
7277
7278 tcg_gen_dupi_vec(vece, t, max);
7279 tcg_gen_umin_vec(vece, n, n, t);
7280 tcg_gen_shli_vec(vece, n, n, halfbits);
7281 tcg_gen_bitsel_vec(vece, d, t, d, n);
7282 tcg_temp_free_vec(t);
7283}
7284
7285static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7286{
7287 static const GVecGen2 ops[3] = {
7288 { .fniv = gen_uqxtnt_vec,
7289 .opt_opc = uqxtn_list,
7290 .load_dest = true,
7291 .fno = gen_helper_sve2_uqxtnt_h,
7292 .vece = MO_16 },
7293 { .fniv = gen_uqxtnt_vec,
7294 .opt_opc = uqxtn_list,
7295 .load_dest = true,
7296 .fno = gen_helper_sve2_uqxtnt_s,
7297 .vece = MO_32 },
7298 { .fniv = gen_uqxtnt_vec,
7299 .opt_opc = uqxtn_list,
7300 .load_dest = true,
7301 .fno = gen_helper_sve2_uqxtnt_d,
7302 .vece = MO_64 },
7303 };
7304 return do_sve2_narrow_extract(s, a, ops);
7305}
7306
7307static const TCGOpcode sqxtun_list[] = {
7308 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7309};
7310
7311static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7312{
7313 TCGv_vec t = tcg_temp_new_vec_matching(d);
7314 int halfbits = 4 << vece;
7315 int64_t max = (1ull << halfbits) - 1;
7316
7317 tcg_gen_dupi_vec(vece, t, 0);
7318 tcg_gen_smax_vec(vece, d, n, t);
7319 tcg_gen_dupi_vec(vece, t, max);
7320 tcg_gen_umin_vec(vece, d, d, t);
7321 tcg_temp_free_vec(t);
7322}
7323
7324static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7325{
7326 static const GVecGen2 ops[3] = {
7327 { .fniv = gen_sqxtunb_vec,
7328 .opt_opc = sqxtun_list,
7329 .fno = gen_helper_sve2_sqxtunb_h,
7330 .vece = MO_16 },
7331 { .fniv = gen_sqxtunb_vec,
7332 .opt_opc = sqxtun_list,
7333 .fno = gen_helper_sve2_sqxtunb_s,
7334 .vece = MO_32 },
7335 { .fniv = gen_sqxtunb_vec,
7336 .opt_opc = sqxtun_list,
7337 .fno = gen_helper_sve2_sqxtunb_d,
7338 .vece = MO_64 },
7339 };
7340 return do_sve2_narrow_extract(s, a, ops);
7341}
7342
7343static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7344{
7345 TCGv_vec t = tcg_temp_new_vec_matching(d);
7346 int halfbits = 4 << vece;
7347 int64_t max = (1ull << halfbits) - 1;
7348
7349 tcg_gen_dupi_vec(vece, t, 0);
7350 tcg_gen_smax_vec(vece, n, n, t);
7351 tcg_gen_dupi_vec(vece, t, max);
7352 tcg_gen_umin_vec(vece, n, n, t);
7353 tcg_gen_shli_vec(vece, n, n, halfbits);
7354 tcg_gen_bitsel_vec(vece, d, t, d, n);
7355 tcg_temp_free_vec(t);
7356}
7357
7358static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7359{
7360 static const GVecGen2 ops[3] = {
7361 { .fniv = gen_sqxtunt_vec,
7362 .opt_opc = sqxtun_list,
7363 .load_dest = true,
7364 .fno = gen_helper_sve2_sqxtunt_h,
7365 .vece = MO_16 },
7366 { .fniv = gen_sqxtunt_vec,
7367 .opt_opc = sqxtun_list,
7368 .load_dest = true,
7369 .fno = gen_helper_sve2_sqxtunt_s,
7370 .vece = MO_32 },
7371 { .fniv = gen_sqxtunt_vec,
7372 .opt_opc = sqxtun_list,
7373 .load_dest = true,
7374 .fno = gen_helper_sve2_sqxtunt_d,
7375 .vece = MO_64 },
7376 };
7377 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7378}
7379
7380static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7381 const GVecGen2i ops[3])
7382{
7383 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7384 return false;
7385 }
7386 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7387 if (sve_access_check(s)) {
7388 unsigned vsz = vec_full_reg_size(s);
7389 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7390 vec_full_reg_offset(s, a->rn),
7391 vsz, vsz, a->imm, &ops[a->esz]);
7392 }
7393 return true;
7394}
7395
7396static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7397{
7398 int halfbits = 4 << vece;
7399 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7400
7401 tcg_gen_shri_i64(d, n, shr);
7402 tcg_gen_andi_i64(d, d, mask);
7403}
7404
7405static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7406{
7407 gen_shrnb_i64(MO_16, d, n, shr);
7408}
7409
7410static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7411{
7412 gen_shrnb_i64(MO_32, d, n, shr);
7413}
7414
7415static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7416{
7417 gen_shrnb_i64(MO_64, d, n, shr);
7418}
7419
7420static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7421{
7422 TCGv_vec t = tcg_temp_new_vec_matching(d);
7423 int halfbits = 4 << vece;
7424 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7425
7426 tcg_gen_shri_vec(vece, n, n, shr);
7427 tcg_gen_dupi_vec(vece, t, mask);
7428 tcg_gen_and_vec(vece, d, n, t);
7429 tcg_temp_free_vec(t);
7430}
7431
7432static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7433{
7434 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7435 static const GVecGen2i ops[3] = {
7436 { .fni8 = gen_shrnb16_i64,
7437 .fniv = gen_shrnb_vec,
7438 .opt_opc = vec_list,
7439 .fno = gen_helper_sve2_shrnb_h,
7440 .vece = MO_16 },
7441 { .fni8 = gen_shrnb32_i64,
7442 .fniv = gen_shrnb_vec,
7443 .opt_opc = vec_list,
7444 .fno = gen_helper_sve2_shrnb_s,
7445 .vece = MO_32 },
7446 { .fni8 = gen_shrnb64_i64,
7447 .fniv = gen_shrnb_vec,
7448 .opt_opc = vec_list,
7449 .fno = gen_helper_sve2_shrnb_d,
7450 .vece = MO_64 },
7451 };
7452 return do_sve2_shr_narrow(s, a, ops);
7453}
7454
7455static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7456{
7457 int halfbits = 4 << vece;
7458 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7459
7460 tcg_gen_shli_i64(n, n, halfbits - shr);
7461 tcg_gen_andi_i64(n, n, ~mask);
7462 tcg_gen_andi_i64(d, d, mask);
7463 tcg_gen_or_i64(d, d, n);
7464}
7465
7466static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7467{
7468 gen_shrnt_i64(MO_16, d, n, shr);
7469}
7470
7471static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7472{
7473 gen_shrnt_i64(MO_32, d, n, shr);
7474}
7475
7476static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7477{
7478 tcg_gen_shri_i64(n, n, shr);
7479 tcg_gen_deposit_i64(d, d, n, 32, 32);
7480}
7481
7482static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7483{
7484 TCGv_vec t = tcg_temp_new_vec_matching(d);
7485 int halfbits = 4 << vece;
7486 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7487
7488 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7489 tcg_gen_dupi_vec(vece, t, mask);
7490 tcg_gen_bitsel_vec(vece, d, t, d, n);
7491 tcg_temp_free_vec(t);
7492}
7493
7494static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7495{
7496 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7497 static const GVecGen2i ops[3] = {
7498 { .fni8 = gen_shrnt16_i64,
7499 .fniv = gen_shrnt_vec,
7500 .opt_opc = vec_list,
7501 .load_dest = true,
7502 .fno = gen_helper_sve2_shrnt_h,
7503 .vece = MO_16 },
7504 { .fni8 = gen_shrnt32_i64,
7505 .fniv = gen_shrnt_vec,
7506 .opt_opc = vec_list,
7507 .load_dest = true,
7508 .fno = gen_helper_sve2_shrnt_s,
7509 .vece = MO_32 },
7510 { .fni8 = gen_shrnt64_i64,
7511 .fniv = gen_shrnt_vec,
7512 .opt_opc = vec_list,
7513 .load_dest = true,
7514 .fno = gen_helper_sve2_shrnt_d,
7515 .vece = MO_64 },
7516 };
7517 return do_sve2_shr_narrow(s, a, ops);
7518}
7519
7520static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7521{
7522 static const GVecGen2i ops[3] = {
7523 { .fno = gen_helper_sve2_rshrnb_h },
7524 { .fno = gen_helper_sve2_rshrnb_s },
7525 { .fno = gen_helper_sve2_rshrnb_d },
7526 };
7527 return do_sve2_shr_narrow(s, a, ops);
7528}
7529
7530static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7531{
7532 static const GVecGen2i ops[3] = {
7533 { .fno = gen_helper_sve2_rshrnt_h },
7534 { .fno = gen_helper_sve2_rshrnt_s },
7535 { .fno = gen_helper_sve2_rshrnt_d },
7536 };
7537 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7538}
7539
7540static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7541 TCGv_vec n, int64_t shr)
7542{
7543 TCGv_vec t = tcg_temp_new_vec_matching(d);
7544 int halfbits = 4 << vece;
7545
7546 tcg_gen_sari_vec(vece, n, n, shr);
7547 tcg_gen_dupi_vec(vece, t, 0);
7548 tcg_gen_smax_vec(vece, n, n, t);
7549 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7550 tcg_gen_umin_vec(vece, d, n, t);
7551 tcg_temp_free_vec(t);
7552}
7553
7554static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7555{
7556 static const TCGOpcode vec_list[] = {
7557 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7558 };
7559 static const GVecGen2i ops[3] = {
7560 { .fniv = gen_sqshrunb_vec,
7561 .opt_opc = vec_list,
7562 .fno = gen_helper_sve2_sqshrunb_h,
7563 .vece = MO_16 },
7564 { .fniv = gen_sqshrunb_vec,
7565 .opt_opc = vec_list,
7566 .fno = gen_helper_sve2_sqshrunb_s,
7567 .vece = MO_32 },
7568 { .fniv = gen_sqshrunb_vec,
7569 .opt_opc = vec_list,
7570 .fno = gen_helper_sve2_sqshrunb_d,
7571 .vece = MO_64 },
7572 };
7573 return do_sve2_shr_narrow(s, a, ops);
7574}
7575
7576static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7577 TCGv_vec n, int64_t shr)
7578{
7579 TCGv_vec t = tcg_temp_new_vec_matching(d);
7580 int halfbits = 4 << vece;
7581
7582 tcg_gen_sari_vec(vece, n, n, shr);
7583 tcg_gen_dupi_vec(vece, t, 0);
7584 tcg_gen_smax_vec(vece, n, n, t);
7585 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7586 tcg_gen_umin_vec(vece, n, n, t);
7587 tcg_gen_shli_vec(vece, n, n, halfbits);
7588 tcg_gen_bitsel_vec(vece, d, t, d, n);
7589 tcg_temp_free_vec(t);
7590}
7591
7592static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7593{
7594 static const TCGOpcode vec_list[] = {
7595 INDEX_op_shli_vec, INDEX_op_sari_vec,
7596 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7597 };
7598 static const GVecGen2i ops[3] = {
7599 { .fniv = gen_sqshrunt_vec,
7600 .opt_opc = vec_list,
7601 .load_dest = true,
7602 .fno = gen_helper_sve2_sqshrunt_h,
7603 .vece = MO_16 },
7604 { .fniv = gen_sqshrunt_vec,
7605 .opt_opc = vec_list,
7606 .load_dest = true,
7607 .fno = gen_helper_sve2_sqshrunt_s,
7608 .vece = MO_32 },
7609 { .fniv = gen_sqshrunt_vec,
7610 .opt_opc = vec_list,
7611 .load_dest = true,
7612 .fno = gen_helper_sve2_sqshrunt_d,
7613 .vece = MO_64 },
7614 };
7615 return do_sve2_shr_narrow(s, a, ops);
7616}
7617
7618static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7619{
7620 static const GVecGen2i ops[3] = {
7621 { .fno = gen_helper_sve2_sqrshrunb_h },
7622 { .fno = gen_helper_sve2_sqrshrunb_s },
7623 { .fno = gen_helper_sve2_sqrshrunb_d },
7624 };
7625 return do_sve2_shr_narrow(s, a, ops);
7626}
7627
7628static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7629{
7630 static const GVecGen2i ops[3] = {
7631 { .fno = gen_helper_sve2_sqrshrunt_h },
7632 { .fno = gen_helper_sve2_sqrshrunt_s },
7633 { .fno = gen_helper_sve2_sqrshrunt_d },
7634 };
7635 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7636}
7637
743bb147
RH
7638static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7639 TCGv_vec n, int64_t shr)
7640{
7641 TCGv_vec t = tcg_temp_new_vec_matching(d);
7642 int halfbits = 4 << vece;
7643 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7644 int64_t min = -max - 1;
7645
7646 tcg_gen_sari_vec(vece, n, n, shr);
7647 tcg_gen_dupi_vec(vece, t, min);
7648 tcg_gen_smax_vec(vece, n, n, t);
7649 tcg_gen_dupi_vec(vece, t, max);
7650 tcg_gen_smin_vec(vece, n, n, t);
7651 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7652 tcg_gen_and_vec(vece, d, n, t);
7653 tcg_temp_free_vec(t);
7654}
7655
7656static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7657{
7658 static const TCGOpcode vec_list[] = {
7659 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7660 };
7661 static const GVecGen2i ops[3] = {
7662 { .fniv = gen_sqshrnb_vec,
7663 .opt_opc = vec_list,
7664 .fno = gen_helper_sve2_sqshrnb_h,
7665 .vece = MO_16 },
7666 { .fniv = gen_sqshrnb_vec,
7667 .opt_opc = vec_list,
7668 .fno = gen_helper_sve2_sqshrnb_s,
7669 .vece = MO_32 },
7670 { .fniv = gen_sqshrnb_vec,
7671 .opt_opc = vec_list,
7672 .fno = gen_helper_sve2_sqshrnb_d,
7673 .vece = MO_64 },
7674 };
7675 return do_sve2_shr_narrow(s, a, ops);
7676}
7677
7678static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7679 TCGv_vec n, int64_t shr)
7680{
7681 TCGv_vec t = tcg_temp_new_vec_matching(d);
7682 int halfbits = 4 << vece;
7683 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7684 int64_t min = -max - 1;
7685
7686 tcg_gen_sari_vec(vece, n, n, shr);
7687 tcg_gen_dupi_vec(vece, t, min);
7688 tcg_gen_smax_vec(vece, n, n, t);
7689 tcg_gen_dupi_vec(vece, t, max);
7690 tcg_gen_smin_vec(vece, n, n, t);
7691 tcg_gen_shli_vec(vece, n, n, halfbits);
7692 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7693 tcg_gen_bitsel_vec(vece, d, t, d, n);
7694 tcg_temp_free_vec(t);
7695}
7696
7697static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7698{
7699 static const TCGOpcode vec_list[] = {
7700 INDEX_op_shli_vec, INDEX_op_sari_vec,
7701 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7702 };
7703 static const GVecGen2i ops[3] = {
7704 { .fniv = gen_sqshrnt_vec,
7705 .opt_opc = vec_list,
7706 .load_dest = true,
7707 .fno = gen_helper_sve2_sqshrnt_h,
7708 .vece = MO_16 },
7709 { .fniv = gen_sqshrnt_vec,
7710 .opt_opc = vec_list,
7711 .load_dest = true,
7712 .fno = gen_helper_sve2_sqshrnt_s,
7713 .vece = MO_32 },
7714 { .fniv = gen_sqshrnt_vec,
7715 .opt_opc = vec_list,
7716 .load_dest = true,
7717 .fno = gen_helper_sve2_sqshrnt_d,
7718 .vece = MO_64 },
7719 };
7720 return do_sve2_shr_narrow(s, a, ops);
7721}
7722
7723static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7724{
7725 static const GVecGen2i ops[3] = {
7726 { .fno = gen_helper_sve2_sqrshrnb_h },
7727 { .fno = gen_helper_sve2_sqrshrnb_s },
7728 { .fno = gen_helper_sve2_sqrshrnb_d },
7729 };
7730 return do_sve2_shr_narrow(s, a, ops);
7731}
7732
7733static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7734{
7735 static const GVecGen2i ops[3] = {
7736 { .fno = gen_helper_sve2_sqrshrnt_h },
7737 { .fno = gen_helper_sve2_sqrshrnt_s },
7738 { .fno = gen_helper_sve2_sqrshrnt_d },
7739 };
7740 return do_sve2_shr_narrow(s, a, ops);
7741}
7742
c13418da
RH
7743static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7744 TCGv_vec n, int64_t shr)
7745{
7746 TCGv_vec t = tcg_temp_new_vec_matching(d);
7747 int halfbits = 4 << vece;
7748
7749 tcg_gen_shri_vec(vece, n, n, shr);
7750 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7751 tcg_gen_umin_vec(vece, d, n, t);
7752 tcg_temp_free_vec(t);
7753}
7754
7755static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7756{
7757 static const TCGOpcode vec_list[] = {
7758 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7759 };
7760 static const GVecGen2i ops[3] = {
7761 { .fniv = gen_uqshrnb_vec,
7762 .opt_opc = vec_list,
7763 .fno = gen_helper_sve2_uqshrnb_h,
7764 .vece = MO_16 },
7765 { .fniv = gen_uqshrnb_vec,
7766 .opt_opc = vec_list,
7767 .fno = gen_helper_sve2_uqshrnb_s,
7768 .vece = MO_32 },
7769 { .fniv = gen_uqshrnb_vec,
7770 .opt_opc = vec_list,
7771 .fno = gen_helper_sve2_uqshrnb_d,
7772 .vece = MO_64 },
7773 };
7774 return do_sve2_shr_narrow(s, a, ops);
7775}
7776
7777static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7778 TCGv_vec n, int64_t shr)
7779{
7780 TCGv_vec t = tcg_temp_new_vec_matching(d);
7781 int halfbits = 4 << vece;
7782
7783 tcg_gen_shri_vec(vece, n, n, shr);
7784 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7785 tcg_gen_umin_vec(vece, n, n, t);
7786 tcg_gen_shli_vec(vece, n, n, halfbits);
7787 tcg_gen_bitsel_vec(vece, d, t, d, n);
7788 tcg_temp_free_vec(t);
7789}
7790
7791static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7792{
7793 static const TCGOpcode vec_list[] = {
7794 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7795 };
7796 static const GVecGen2i ops[3] = {
7797 { .fniv = gen_uqshrnt_vec,
7798 .opt_opc = vec_list,
7799 .load_dest = true,
7800 .fno = gen_helper_sve2_uqshrnt_h,
7801 .vece = MO_16 },
7802 { .fniv = gen_uqshrnt_vec,
7803 .opt_opc = vec_list,
7804 .load_dest = true,
7805 .fno = gen_helper_sve2_uqshrnt_s,
7806 .vece = MO_32 },
7807 { .fniv = gen_uqshrnt_vec,
7808 .opt_opc = vec_list,
7809 .load_dest = true,
7810 .fno = gen_helper_sve2_uqshrnt_d,
7811 .vece = MO_64 },
7812 };
7813 return do_sve2_shr_narrow(s, a, ops);
7814}
7815
7816static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7817{
7818 static const GVecGen2i ops[3] = {
7819 { .fno = gen_helper_sve2_uqrshrnb_h },
7820 { .fno = gen_helper_sve2_uqrshrnb_s },
7821 { .fno = gen_helper_sve2_uqrshrnb_d },
7822 };
7823 return do_sve2_shr_narrow(s, a, ops);
7824}
7825
7826static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7827{
7828 static const GVecGen2i ops[3] = {
7829 { .fno = gen_helper_sve2_uqrshrnt_h },
7830 { .fno = gen_helper_sve2_uqrshrnt_s },
7831 { .fno = gen_helper_sve2_uqrshrnt_d },
7832 };
7833 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7834}
b87dbeeb 7835
40d5ea50 7836#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7837 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7838 NULL, gen_helper_sve2_##name##_h, \
7839 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7840 }; \
bd394cf5
RH
7841 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7842 name##_fns[a->esz], a, 0)
40d5ea50
SL
7843
7844DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7845DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7846DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7847DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7848
c3cd6766
SL
7849DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7850DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7851DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7852DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7853
e0ae6ec3
SL
7854static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
7855 gen_helper_gvec_flags_4 *fn)
7856{
7857 if (!dc_isar_feature(aa64_sve2, s)) {
7858 return false;
7859 }
7860 return do_ppzz_flags(s, a, fn);
7861}
7862
7863#define DO_SVE2_PPZZ_MATCH(NAME, name) \
7864static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7865{ \
7866 static gen_helper_gvec_flags_4 * const fns[4] = { \
7867 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
7868 NULL, NULL \
7869 }; \
7870 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
7871}
7872
7873DO_SVE2_PPZZ_MATCH(MATCH, match)
7874DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
7875
7d47ac94
SL
7876static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
7877{
7878 static gen_helper_gvec_4 * const fns[2] = {
7879 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7880 };
7881 if (a->esz < 2) {
7882 return false;
7883 }
7884 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
7885}
7886
bd394cf5
RH
7887TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7888 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7889
b87dbeeb
SL
7890static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7891 gen_helper_gvec_4_ptr *fn)
7892{
7893 if (!dc_isar_feature(aa64_sve2, s)) {
7894 return false;
7895 }
7896 return do_zpzz_fp(s, a, fn);
7897}
7898
7899#define DO_SVE2_ZPZZ_FP(NAME, name) \
7900static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7901{ \
7902 static gen_helper_gvec_4_ptr * const fns[4] = { \
7903 NULL, gen_helper_sve2_##name##_zpzz_h, \
7904 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7905 }; \
7906 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7907}
7908
7909DO_SVE2_ZPZZ_FP(FADDP, faddp)
7910DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7911DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7912DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7913DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7914
7915/*
7916 * SVE Integer Multiply-Add (unpredicated)
7917 */
7918
4f26756b
SL
7919static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
7920{
7921 gen_helper_gvec_4_ptr *fn;
7922
7923 switch (a->esz) {
7924 case MO_32:
7925 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
7926 return false;
7927 }
7928 fn = gen_helper_fmmla_s;
7929 break;
7930 case MO_64:
7931 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
7932 return false;
7933 }
7934 fn = gen_helper_fmmla_d;
7935 break;
7936 default:
7937 return false;
7938 }
7939
7940 if (sve_access_check(s)) {
7941 unsigned vsz = vec_full_reg_size(s);
7942 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7943 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7944 vec_full_reg_offset(s, a->rn),
7945 vec_full_reg_offset(s, a->rm),
7946 vec_full_reg_offset(s, a->ra),
7947 status, vsz, vsz, 0, fn);
7948 tcg_temp_free_ptr(status);
7949 }
7950 return true;
7951}
7952
eeb4e84d
RH
7953static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7954 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7955 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7956};
7957TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7958 sqdmlal_zzzw_fns[a->esz], a, 0)
7959TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7960 sqdmlal_zzzw_fns[a->esz], a, 3)
7961TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7962 sqdmlal_zzzw_fns[a->esz], a, 2)
7963
7964static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7965 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7966 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7967};
7968TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7969 sqdmlsl_zzzw_fns[a->esz], a, 0)
7970TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7971 sqdmlsl_zzzw_fns[a->esz], a, 3)
7972TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7973 sqdmlsl_zzzw_fns[a->esz], a, 2)
7974
7975static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7976 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7977 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7978};
7979TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7980 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7981
eeb4e84d
RH
7982static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7983 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7984 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7985};
7986TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7987 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7988
eeb4e84d
RH
7989static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7990 NULL, gen_helper_sve2_smlal_zzzw_h,
7991 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7992};
7993TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7994 smlal_zzzw_fns[a->esz], a, 0)
7995TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7996 smlal_zzzw_fns[a->esz], a, 1)
7997
7998static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7999 NULL, gen_helper_sve2_umlal_zzzw_h,
8000 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8001};
8002TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8003 umlal_zzzw_fns[a->esz], a, 0)
8004TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8005 umlal_zzzw_fns[a->esz], a, 1)
8006
8007static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
8008 NULL, gen_helper_sve2_smlsl_zzzw_h,
8009 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8010};
8011TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8012 smlsl_zzzw_fns[a->esz], a, 0)
8013TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8014 smlsl_zzzw_fns[a->esz], a, 1)
8015
8016static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
8017 NULL, gen_helper_sve2_umlsl_zzzw_h,
8018 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8019};
8020TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8021 umlsl_zzzw_fns[a->esz], a, 0)
8022TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
8023 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 8024
5f425b92
RH
8025static gen_helper_gvec_4 * const cmla_fns[] = {
8026 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8027 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8028};
8029TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
8030 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 8031
5f425b92
RH
8032static gen_helper_gvec_4 * const cdot_fns[] = {
8033 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
8034};
8035TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
8036 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 8037
5f425b92
RH
8038static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
8039 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8040 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8041};
8042TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
8043 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 8044
8740d694
RH
8045TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8046 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 8047
0ea3cdbf
RH
8048TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
8049 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 8050
32e2ad65
RH
8051TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
8052 gen_helper_crypto_aese, a, false)
8053TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
8054 gen_helper_crypto_aese, a, true)
3cc7a88e 8055
32e2ad65
RH
8056TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
8057 gen_helper_crypto_sm4e, a, 0)
8058TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
8059 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f
RH
8060
8061static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8062{
8063 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8064 return false;
8065 }
8066 if (sve_access_check(s)) {
8067 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8068 }
8069 return true;
8070}
5c1b7226
RH
8071
8072static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8073{
8074 if (!dc_isar_feature(aa64_sve2, s)) {
8075 return false;
8076 }
8077 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8078}
8079
d29b17ca
RH
8080static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8081{
8082 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8083 return false;
8084 }
8085 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8086}
8087
5c1b7226
RH
8088static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8089{
8090 if (!dc_isar_feature(aa64_sve2, s)) {
8091 return false;
8092 }
8093 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8094}
83c2523f
SL
8095
8096static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8097{
8098 if (!dc_isar_feature(aa64_sve2, s)) {
8099 return false;
8100 }
8101 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8102}
8103
8104static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8105{
8106 if (!dc_isar_feature(aa64_sve2, s)) {
8107 return false;
8108 }
8109 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8110}
95365277
SL
8111
8112static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8113{
8114 if (!dc_isar_feature(aa64_sve2, s)) {
8115 return false;
8116 }
8117 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8118}
8119
8120static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8121{
8122 if (!dc_isar_feature(aa64_sve2, s)) {
8123 return false;
8124 }
8125 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8126}
631be02e
SL
8127
8128static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8129{
8130 static gen_helper_gvec_3_ptr * const fns[] = {
8131 NULL, gen_helper_flogb_h,
8132 gen_helper_flogb_s, gen_helper_flogb_d
8133 };
8134
8135 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8136 return false;
8137 }
8138 if (sve_access_check(s)) {
8139 TCGv_ptr status =
8140 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8141 unsigned vsz = vec_full_reg_size(s);
8142
8143 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8144 vec_full_reg_offset(s, a->rn),
8145 pred_full_reg_offset(s, a->pg),
8146 status, vsz, vsz, 0, fns[a->esz]);
8147 tcg_temp_free_ptr(status);
8148 }
8149 return true;
8150}
50d102bd
SL
8151
8152static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8153{
8154 if (!dc_isar_feature(aa64_sve2, s)) {
8155 return false;
8156 }
8157 if (sve_access_check(s)) {
8158 unsigned vsz = vec_full_reg_size(s);
8159 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8160 vec_full_reg_offset(s, a->rn),
8161 vec_full_reg_offset(s, a->rm),
8162 vec_full_reg_offset(s, a->ra),
8163 cpu_env, vsz, vsz, (sel << 1) | sub,
8164 gen_helper_sve2_fmlal_zzzw_s);
8165 }
8166 return true;
8167}
8168
8169static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8170{
8171 return do_FMLAL_zzzw(s, a, false, false);
8172}
8173
8174static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8175{
8176 return do_FMLAL_zzzw(s, a, false, true);
8177}
8178
8179static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8180{
8181 return do_FMLAL_zzzw(s, a, true, false);
8182}
8183
8184static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8185{
8186 return do_FMLAL_zzzw(s, a, true, true);
8187}
8188
8189static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8190{
8191 if (!dc_isar_feature(aa64_sve2, s)) {
8192 return false;
8193 }
8194 if (sve_access_check(s)) {
8195 unsigned vsz = vec_full_reg_size(s);
8196 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8197 vec_full_reg_offset(s, a->rn),
8198 vec_full_reg_offset(s, a->rm),
8199 vec_full_reg_offset(s, a->ra),
8200 cpu_env, vsz, vsz,
8201 (a->index << 2) | (sel << 1) | sub,
8202 gen_helper_sve2_fmlal_zzxw_s);
8203 }
8204 return true;
8205}
8206
8207static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8208{
8209 return do_FMLAL_zzxw(s, a, false, false);
8210}
8211
8212static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8213{
8214 return do_FMLAL_zzxw(s, a, false, true);
8215}
8216
8217static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8218{
8219 return do_FMLAL_zzxw(s, a, true, false);
8220}
8221
8222static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8223{
8224 return do_FMLAL_zzxw(s, a, true, true);
8225}
2323c5ff 8226
eec05e4e
RH
8227TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8228 gen_helper_gvec_smmla_b, a, 0)
8229TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8230 gen_helper_gvec_usmmla_b, a, 0)
8231TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8232 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 8233
eec05e4e
RH
8234TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8235 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
8236TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
8237 gen_helper_gvec_bfdot_idx, a)
81266a1f 8238
eec05e4e
RH
8239TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8240 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
8241
8242static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8243{
8244 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8245 return false;
8246 }
8247 if (sve_access_check(s)) {
8248 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8249 unsigned vsz = vec_full_reg_size(s);
8250
8251 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8252 vec_full_reg_offset(s, a->rn),
8253 vec_full_reg_offset(s, a->rm),
8254 vec_full_reg_offset(s, a->ra),
8255 status, vsz, vsz, sel,
8256 gen_helper_gvec_bfmlal);
8257 tcg_temp_free_ptr(status);
8258 }
8259 return true;
8260}
8261
8262static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8263{
8264 return do_BFMLAL_zzzw(s, a, false);
8265}
8266
8267static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8268{
8269 return do_BFMLAL_zzzw(s, a, true);
8270}
458d0ab6
RH
8271
8272static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8273{
8274 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8275 return false;
8276 }
8277 if (sve_access_check(s)) {
8278 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8279 unsigned vsz = vec_full_reg_size(s);
8280
8281 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8282 vec_full_reg_offset(s, a->rn),
8283 vec_full_reg_offset(s, a->rm),
8284 vec_full_reg_offset(s, a->ra),
8285 status, vsz, vsz, (a->index << 1) | sel,
8286 gen_helper_gvec_bfmlal_idx);
8287 tcg_temp_free_ptr(status);
8288 }
8289 return true;
8290}
8291
8292static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8293{
8294 return do_BFMLAL_zzxw(s, a, false);
8295}
8296
8297static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8298{
8299 return do_BFMLAL_zzxw(s, a, true);
8300}