]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Move null function and sve check into gen_gvec_ool_zzzp
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638 178/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 179static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
180 int rd, int rn, int rm, int ra, int data)
181{
7ad416b1
RH
182 if (fn == NULL) {
183 return false;
184 }
185 if (sve_access_check(s)) {
186 unsigned vsz = vec_full_reg_size(s);
187 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
188 vec_full_reg_offset(s, rn),
189 vec_full_reg_offset(s, rm),
190 vec_full_reg_offset(s, ra),
191 vsz, vsz, data, fn);
192 }
193 return true;
38650638
RH
194}
195
cab79ac9
RH
196static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
197 arg_rrrr_esz *a, int data)
198{
199 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
200}
201
e82d3536
RH
202static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
203 arg_rrxr_esz *a)
204{
205 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
206}
207
96a461f7 208/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 209static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
210 int rd, int rn, int pg, int data)
211{
8fb27a21
RH
212 if (fn == NULL) {
213 return false;
214 }
215 if (sve_access_check(s)) {
216 unsigned vsz = vec_full_reg_size(s);
217 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
218 vec_full_reg_offset(s, rn),
219 pred_full_reg_offset(s, pg),
220 vsz, vsz, data, fn);
221 }
222 return true;
96a461f7
RH
223}
224
b051809a
RH
225static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
226 arg_rpr_esz *a, int data)
227{
228 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
229}
230
afa2529c
RH
231static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
232 arg_rpri_esz *a)
233{
234 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
235}
b051809a 236
36cbb7a8 237/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 238static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
239 int rd, int rn, int rm, int pg, int data)
240{
2a753d1e
RH
241 if (fn == NULL) {
242 return false;
243 }
244 if (sve_access_check(s)) {
245 unsigned vsz = vec_full_reg_size(s);
246 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
247 vec_full_reg_offset(s, rn),
248 vec_full_reg_offset(s, rm),
249 pred_full_reg_offset(s, pg),
250 vsz, vsz, data, fn);
251 }
252 return true;
36cbb7a8 253}
f7d79c41 254
36cbb7a8 255/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
256static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
257 int esz, int rd, int rn)
38388f7e 258{
f7d79c41
RH
259 unsigned vsz = vec_full_reg_size(s);
260 gvec_fn(esz, vec_full_reg_offset(s, rd),
261 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
262}
263
39eea561 264/* Invoke a vector expander on three Zregs. */
28c4da31
RH
265static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
266 int esz, int rd, int rn, int rm)
38388f7e 267{
28c4da31
RH
268 unsigned vsz = vec_full_reg_size(s);
269 gvec_fn(esz, vec_full_reg_offset(s, rd),
270 vec_full_reg_offset(s, rn),
271 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
272}
273
911cdc6d
RH
274/* Invoke a vector expander on four Zregs. */
275static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
276 int esz, int rd, int rn, int rm, int ra)
277{
278 unsigned vsz = vec_full_reg_size(s);
279 gvec_fn(esz, vec_full_reg_offset(s, rd),
280 vec_full_reg_offset(s, rn),
281 vec_full_reg_offset(s, rm),
282 vec_full_reg_offset(s, ra), vsz, vsz);
283}
284
39eea561
RH
285/* Invoke a vector move on two Zregs. */
286static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 287{
f7d79c41
RH
288 if (sve_access_check(s)) {
289 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
290 }
291 return true;
38388f7e
RH
292}
293
d9d78dcc
RH
294/* Initialize a Zreg with replications of a 64-bit immediate. */
295static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
296{
297 unsigned vsz = vec_full_reg_size(s);
8711e71f 298 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
299}
300
516e246a 301/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
302static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
303 int rd, int rn, int rm)
516e246a 304{
dd81a8d7
RH
305 unsigned psz = pred_gvec_reg_size(s);
306 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
307 pred_full_reg_offset(s, rn),
308 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
309}
310
311/* Invoke a vector move on two Pregs. */
312static bool do_mov_p(DisasContext *s, int rd, int rn)
313{
d0b2df5a
RH
314 if (sve_access_check(s)) {
315 unsigned psz = pred_gvec_reg_size(s);
316 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
317 pred_full_reg_offset(s, rn), psz, psz);
318 }
319 return true;
516e246a
RH
320}
321
9e18d7a6
RH
322/* Set the cpu flags as per a return from an SVE helper. */
323static void do_pred_flags(TCGv_i32 t)
324{
325 tcg_gen_mov_i32(cpu_NF, t);
326 tcg_gen_andi_i32(cpu_ZF, t, 2);
327 tcg_gen_andi_i32(cpu_CF, t, 1);
328 tcg_gen_movi_i32(cpu_VF, 0);
329}
330
331/* Subroutines computing the ARM PredTest psuedofunction. */
332static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
333{
334 TCGv_i32 t = tcg_temp_new_i32();
335
336 gen_helper_sve_predtest1(t, d, g);
337 do_pred_flags(t);
338 tcg_temp_free_i32(t);
339}
340
341static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
342{
343 TCGv_ptr dptr = tcg_temp_new_ptr();
344 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 345 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
346
347 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
348 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 349
392acacc 350 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
351 tcg_temp_free_ptr(dptr);
352 tcg_temp_free_ptr(gptr);
353
354 do_pred_flags(t);
355 tcg_temp_free_i32(t);
356}
357
028e2a7b
RH
358/* For each element size, the bits within a predicate word that are active. */
359const uint64_t pred_esz_masks[4] = {
360 0xffffffffffffffffull, 0x5555555555555555ull,
361 0x1111111111111111ull, 0x0101010101010101ull
362};
363
39eea561
RH
364/*
365 *** SVE Logical - Unpredicated Group
366 */
367
28c4da31
RH
368static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
369{
370 if (sve_access_check(s)) {
371 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
372 }
373 return true;
374}
375
3a7be554 376static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 377{
28c4da31 378 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
379}
380
3a7be554 381static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 382{
28c4da31 383 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
384}
385
3a7be554 386static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 387{
28c4da31 388 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
389}
390
3a7be554 391static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 392{
28c4da31 393 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 394}
d1822297 395
e6eba6e5
RH
396static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
397{
398 TCGv_i64 t = tcg_temp_new_i64();
399 uint64_t mask = dup_const(MO_8, 0xff >> sh);
400
401 tcg_gen_xor_i64(t, n, m);
402 tcg_gen_shri_i64(d, t, sh);
403 tcg_gen_shli_i64(t, t, 8 - sh);
404 tcg_gen_andi_i64(d, d, mask);
405 tcg_gen_andi_i64(t, t, ~mask);
406 tcg_gen_or_i64(d, d, t);
407 tcg_temp_free_i64(t);
408}
409
410static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
411{
412 TCGv_i64 t = tcg_temp_new_i64();
413 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
414
415 tcg_gen_xor_i64(t, n, m);
416 tcg_gen_shri_i64(d, t, sh);
417 tcg_gen_shli_i64(t, t, 16 - sh);
418 tcg_gen_andi_i64(d, d, mask);
419 tcg_gen_andi_i64(t, t, ~mask);
420 tcg_gen_or_i64(d, d, t);
421 tcg_temp_free_i64(t);
422}
423
424static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
425{
426 tcg_gen_xor_i32(d, n, m);
427 tcg_gen_rotri_i32(d, d, sh);
428}
429
430static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
431{
432 tcg_gen_xor_i64(d, n, m);
433 tcg_gen_rotri_i64(d, d, sh);
434}
435
436static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
437 TCGv_vec m, int64_t sh)
438{
439 tcg_gen_xor_vec(vece, d, n, m);
440 tcg_gen_rotri_vec(vece, d, d, sh);
441}
442
443void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
444 uint32_t rm_ofs, int64_t shift,
445 uint32_t opr_sz, uint32_t max_sz)
446{
447 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
448 static const GVecGen3i ops[4] = {
449 { .fni8 = gen_xar8_i64,
450 .fniv = gen_xar_vec,
451 .fno = gen_helper_sve2_xar_b,
452 .opt_opc = vecop,
453 .vece = MO_8 },
454 { .fni8 = gen_xar16_i64,
455 .fniv = gen_xar_vec,
456 .fno = gen_helper_sve2_xar_h,
457 .opt_opc = vecop,
458 .vece = MO_16 },
459 { .fni4 = gen_xar_i32,
460 .fniv = gen_xar_vec,
461 .fno = gen_helper_sve2_xar_s,
462 .opt_opc = vecop,
463 .vece = MO_32 },
464 { .fni8 = gen_xar_i64,
465 .fniv = gen_xar_vec,
466 .fno = gen_helper_gvec_xar_d,
467 .opt_opc = vecop,
468 .vece = MO_64 }
469 };
470 int esize = 8 << vece;
471
472 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
473 tcg_debug_assert(shift >= 0);
474 tcg_debug_assert(shift <= esize);
475 shift &= esize - 1;
476
477 if (shift == 0) {
478 /* xar with no rotate devolves to xor. */
479 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
480 } else {
481 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
482 shift, &ops[vece]);
483 }
484}
485
486static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
487{
488 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
489 return false;
490 }
491 if (sve_access_check(s)) {
492 unsigned vsz = vec_full_reg_size(s);
493 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
494 vec_full_reg_offset(s, a->rn),
495 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
496 }
497 return true;
498}
499
911cdc6d
RH
500static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
501{
502 if (!dc_isar_feature(aa64_sve2, s)) {
503 return false;
504 }
505 if (sve_access_check(s)) {
506 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
507 }
508 return true;
509}
510
511static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
512{
513 tcg_gen_xor_i64(d, n, m);
514 tcg_gen_xor_i64(d, d, k);
515}
516
517static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
518 TCGv_vec m, TCGv_vec k)
519{
520 tcg_gen_xor_vec(vece, d, n, m);
521 tcg_gen_xor_vec(vece, d, d, k);
522}
523
524static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
525 uint32_t a, uint32_t oprsz, uint32_t maxsz)
526{
527 static const GVecGen4 op = {
528 .fni8 = gen_eor3_i64,
529 .fniv = gen_eor3_vec,
530 .fno = gen_helper_sve2_eor3,
531 .vece = MO_64,
532 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
533 };
534 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
535}
536
537static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
538{
539 return do_sve2_zzzz_fn(s, a, gen_eor3);
540}
541
542static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
543{
544 tcg_gen_andc_i64(d, m, k);
545 tcg_gen_xor_i64(d, d, n);
546}
547
548static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
549 TCGv_vec m, TCGv_vec k)
550{
551 tcg_gen_andc_vec(vece, d, m, k);
552 tcg_gen_xor_vec(vece, d, d, n);
553}
554
555static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
556 uint32_t a, uint32_t oprsz, uint32_t maxsz)
557{
558 static const GVecGen4 op = {
559 .fni8 = gen_bcax_i64,
560 .fniv = gen_bcax_vec,
561 .fno = gen_helper_sve2_bcax,
562 .vece = MO_64,
563 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
564 };
565 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
566}
567
568static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
569{
570 return do_sve2_zzzz_fn(s, a, gen_bcax);
571}
572
573static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
574 uint32_t a, uint32_t oprsz, uint32_t maxsz)
575{
576 /* BSL differs from the generic bitsel in argument ordering. */
577 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
578}
579
580static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
581{
582 return do_sve2_zzzz_fn(s, a, gen_bsl);
583}
584
585static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
586{
587 tcg_gen_andc_i64(n, k, n);
588 tcg_gen_andc_i64(m, m, k);
589 tcg_gen_or_i64(d, n, m);
590}
591
592static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
593 TCGv_vec m, TCGv_vec k)
594{
595 if (TCG_TARGET_HAS_bitsel_vec) {
596 tcg_gen_not_vec(vece, n, n);
597 tcg_gen_bitsel_vec(vece, d, k, n, m);
598 } else {
599 tcg_gen_andc_vec(vece, n, k, n);
600 tcg_gen_andc_vec(vece, m, m, k);
601 tcg_gen_or_vec(vece, d, n, m);
602 }
603}
604
605static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
606 uint32_t a, uint32_t oprsz, uint32_t maxsz)
607{
608 static const GVecGen4 op = {
609 .fni8 = gen_bsl1n_i64,
610 .fniv = gen_bsl1n_vec,
611 .fno = gen_helper_sve2_bsl1n,
612 .vece = MO_64,
613 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
614 };
615 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
616}
617
618static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
619{
620 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
621}
622
623static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
624{
625 /*
626 * Z[dn] = (n & k) | (~m & ~k)
627 * = | ~(m | k)
628 */
629 tcg_gen_and_i64(n, n, k);
630 if (TCG_TARGET_HAS_orc_i64) {
631 tcg_gen_or_i64(m, m, k);
632 tcg_gen_orc_i64(d, n, m);
633 } else {
634 tcg_gen_nor_i64(m, m, k);
635 tcg_gen_or_i64(d, n, m);
636 }
637}
638
639static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
640 TCGv_vec m, TCGv_vec k)
641{
642 if (TCG_TARGET_HAS_bitsel_vec) {
643 tcg_gen_not_vec(vece, m, m);
644 tcg_gen_bitsel_vec(vece, d, k, n, m);
645 } else {
646 tcg_gen_and_vec(vece, n, n, k);
647 tcg_gen_or_vec(vece, m, m, k);
648 tcg_gen_orc_vec(vece, d, n, m);
649 }
650}
651
652static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
653 uint32_t a, uint32_t oprsz, uint32_t maxsz)
654{
655 static const GVecGen4 op = {
656 .fni8 = gen_bsl2n_i64,
657 .fniv = gen_bsl2n_vec,
658 .fno = gen_helper_sve2_bsl2n,
659 .vece = MO_64,
660 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
661 };
662 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
663}
664
665static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
666{
667 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
668}
669
670static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
671{
672 tcg_gen_and_i64(n, n, k);
673 tcg_gen_andc_i64(m, m, k);
674 tcg_gen_nor_i64(d, n, m);
675}
676
677static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
678 TCGv_vec m, TCGv_vec k)
679{
680 tcg_gen_bitsel_vec(vece, d, k, n, m);
681 tcg_gen_not_vec(vece, d, d);
682}
683
684static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
685 uint32_t a, uint32_t oprsz, uint32_t maxsz)
686{
687 static const GVecGen4 op = {
688 .fni8 = gen_nbsl_i64,
689 .fniv = gen_nbsl_vec,
690 .fno = gen_helper_sve2_nbsl,
691 .vece = MO_64,
692 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
693 };
694 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
695}
696
697static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
698{
699 return do_sve2_zzzz_fn(s, a, gen_nbsl);
700}
701
fea98f9c
RH
702/*
703 *** SVE Integer Arithmetic - Unpredicated Group
704 */
705
3a7be554 706static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 707{
28c4da31 708 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
709}
710
3a7be554 711static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 712{
28c4da31 713 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
714}
715
3a7be554 716static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 717{
28c4da31 718 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
719}
720
3a7be554 721static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 722{
28c4da31 723 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
724}
725
3a7be554 726static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 727{
28c4da31 728 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
729}
730
3a7be554 731static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 732{
28c4da31 733 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
734}
735
f97cfd59
RH
736/*
737 *** SVE Integer Arithmetic - Binary Predicated Group
738 */
739
740static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
741{
2a753d1e 742 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
743}
744
a2103582
RH
745/* Select active elememnts from Zn and inactive elements from Zm,
746 * storing the result in Zd.
747 */
748static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
749{
750 static gen_helper_gvec_4 * const fns[4] = {
751 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
752 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
753 };
36cbb7a8 754 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
755}
756
f97cfd59 757#define DO_ZPZZ(NAME, name) \
3a7be554 758static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
759{ \
760 static gen_helper_gvec_4 * const fns[4] = { \
761 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
762 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
763 }; \
764 return do_zpzz_ool(s, a, fns[a->esz]); \
765}
766
767DO_ZPZZ(AND, and)
768DO_ZPZZ(EOR, eor)
769DO_ZPZZ(ORR, orr)
770DO_ZPZZ(BIC, bic)
771
772DO_ZPZZ(ADD, add)
773DO_ZPZZ(SUB, sub)
774
775DO_ZPZZ(SMAX, smax)
776DO_ZPZZ(UMAX, umax)
777DO_ZPZZ(SMIN, smin)
778DO_ZPZZ(UMIN, umin)
779DO_ZPZZ(SABD, sabd)
780DO_ZPZZ(UABD, uabd)
781
782DO_ZPZZ(MUL, mul)
783DO_ZPZZ(SMULH, smulh)
784DO_ZPZZ(UMULH, umulh)
785
27721dbb
RH
786DO_ZPZZ(ASR, asr)
787DO_ZPZZ(LSR, lsr)
788DO_ZPZZ(LSL, lsl)
789
3a7be554 790static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
791{
792 static gen_helper_gvec_4 * const fns[4] = {
793 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
794 };
795 return do_zpzz_ool(s, a, fns[a->esz]);
796}
797
3a7be554 798static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
799{
800 static gen_helper_gvec_4 * const fns[4] = {
801 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
802 };
803 return do_zpzz_ool(s, a, fns[a->esz]);
804}
805
3a7be554 806static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
807{
808 if (sve_access_check(s)) {
809 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
810 }
811 return true;
812}
d3fe4a29 813
f97cfd59
RH
814#undef DO_ZPZZ
815
afac6d04
RH
816/*
817 *** SVE Integer Arithmetic - Unary Predicated Group
818 */
819
817bd5c9
RH
820#define DO_ZPZ(NAME, FEAT, name) \
821 static gen_helper_gvec_3 * const name##_fns[4] = { \
822 gen_helper_##name##_b, gen_helper_##name##_h, \
823 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 824 }; \
817bd5c9
RH
825 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
826
827DO_ZPZ(CLS, aa64_sve, sve_cls)
828DO_ZPZ(CLZ, aa64_sve, sve_clz)
829DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
830DO_ZPZ(CNOT, aa64_sve, sve_cnot)
831DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
832DO_ZPZ(ABS, aa64_sve, sve_abs)
833DO_ZPZ(NEG, aa64_sve, sve_neg)
834DO_ZPZ(RBIT, aa64_sve, sve_rbit)
835
836static gen_helper_gvec_3 * const fabs_fns[4] = {
837 NULL, gen_helper_sve_fabs_h,
838 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
839};
840TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 841
817bd5c9
RH
842static gen_helper_gvec_3 * const fneg_fns[4] = {
843 NULL, gen_helper_sve_fneg_h,
844 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
845};
846TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 847
817bd5c9
RH
848static gen_helper_gvec_3 * const sxtb_fns[4] = {
849 NULL, gen_helper_sve_sxtb_h,
850 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
851};
852TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 853
817bd5c9
RH
854static gen_helper_gvec_3 * const uxtb_fns[4] = {
855 NULL, gen_helper_sve_uxtb_h,
856 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
857};
858TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 859
817bd5c9
RH
860static gen_helper_gvec_3 * const sxth_fns[4] = {
861 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
862};
863TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 864
817bd5c9
RH
865static gen_helper_gvec_3 * const uxth_fns[4] = {
866 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
867};
868TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 869
817bd5c9
RH
870TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
871 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
872TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
873 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 874
047cec97
RH
875/*
876 *** SVE Integer Reduction Group
877 */
878
879typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
880static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
881 gen_helper_gvec_reduc *fn)
882{
883 unsigned vsz = vec_full_reg_size(s);
884 TCGv_ptr t_zn, t_pg;
885 TCGv_i32 desc;
886 TCGv_i64 temp;
887
888 if (fn == NULL) {
889 return false;
890 }
891 if (!sve_access_check(s)) {
892 return true;
893 }
894
c6a59b55 895 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
896 temp = tcg_temp_new_i64();
897 t_zn = tcg_temp_new_ptr();
898 t_pg = tcg_temp_new_ptr();
899
900 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
901 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
902 fn(temp, t_zn, t_pg, desc);
903 tcg_temp_free_ptr(t_zn);
904 tcg_temp_free_ptr(t_pg);
047cec97
RH
905
906 write_fp_dreg(s, a->rd, temp);
907 tcg_temp_free_i64(temp);
908 return true;
909}
910
911#define DO_VPZ(NAME, name) \
3a7be554 912static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
913{ \
914 static gen_helper_gvec_reduc * const fns[4] = { \
915 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
916 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
917 }; \
918 return do_vpz_ool(s, a, fns[a->esz]); \
919}
920
921DO_VPZ(ORV, orv)
922DO_VPZ(ANDV, andv)
923DO_VPZ(EORV, eorv)
924
925DO_VPZ(UADDV, uaddv)
926DO_VPZ(SMAXV, smaxv)
927DO_VPZ(UMAXV, umaxv)
928DO_VPZ(SMINV, sminv)
929DO_VPZ(UMINV, uminv)
930
3a7be554 931static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
932{
933 static gen_helper_gvec_reduc * const fns[4] = {
934 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
935 gen_helper_sve_saddv_s, NULL
936 };
937 return do_vpz_ool(s, a, fns[a->esz]);
938}
939
940#undef DO_VPZ
941
ccd841c3
RH
942/*
943 *** SVE Shift by Immediate - Predicated Group
944 */
945
60245996
RH
946/*
947 * Copy Zn into Zd, storing zeros into inactive elements.
948 * If invert, store zeros into the active elements.
ccd841c3 949 */
60245996
RH
950static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
951 int esz, bool invert)
ccd841c3 952{
60245996
RH
953 static gen_helper_gvec_3 * const fns[4] = {
954 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
955 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 956 };
8fb27a21 957 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
958}
959
3a7be554 960static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
961{
962 static gen_helper_gvec_3 * const fns[4] = {
963 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
964 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
965 };
966 if (a->esz < 0) {
967 /* Invalid tsz encoding -- see tszimm_esz. */
968 return false;
969 }
970 /* Shift by element size is architecturally valid. For
971 arithmetic right-shift, it's the same as by one less. */
972 a->imm = MIN(a->imm, (8 << a->esz) - 1);
afa2529c 973 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
974}
975
3a7be554 976static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
977{
978 static gen_helper_gvec_3 * const fns[4] = {
979 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
980 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
981 };
982 if (a->esz < 0) {
983 return false;
984 }
985 /* Shift by element size is architecturally valid.
986 For logical shifts, it is a zeroing operation. */
987 if (a->imm >= (8 << a->esz)) {
60245996 988 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 989 } else {
afa2529c 990 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
991 }
992}
993
3a7be554 994static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
995{
996 static gen_helper_gvec_3 * const fns[4] = {
997 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
998 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
999 };
1000 if (a->esz < 0) {
1001 return false;
1002 }
1003 /* Shift by element size is architecturally valid.
1004 For logical shifts, it is a zeroing operation. */
1005 if (a->imm >= (8 << a->esz)) {
60245996 1006 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 1007 } else {
afa2529c 1008 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1009 }
1010}
1011
3a7be554 1012static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1013{
1014 static gen_helper_gvec_3 * const fns[4] = {
1015 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1016 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1017 };
1018 if (a->esz < 0) {
1019 return false;
1020 }
1021 /* Shift by element size is architecturally valid. For arithmetic
1022 right shift for division, it is a zeroing operation. */
1023 if (a->imm >= (8 << a->esz)) {
60245996 1024 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 1025 } else {
afa2529c 1026 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1027 }
1028}
1029
4df37e41
RH
1030static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1031 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1032 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1033};
1034TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1035 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1036
4df37e41
RH
1037static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1038 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1039 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1040};
1041TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1042 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1043
4df37e41
RH
1044static gen_helper_gvec_3 * const srshr_fns[4] = {
1045 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1046 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1047};
1048TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1049 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1050
4df37e41
RH
1051static gen_helper_gvec_3 * const urshr_fns[4] = {
1052 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1053 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1054};
1055TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1056 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1057
4df37e41
RH
1058static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1059 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1060 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1061};
1062TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1063 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1064
fe7f8dfb
RH
1065/*
1066 *** SVE Bitwise Shift - Predicated Group
1067 */
1068
1069#define DO_ZPZW(NAME, name) \
3a7be554 1070static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1071{ \
1072 static gen_helper_gvec_4 * const fns[3] = { \
1073 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1074 gen_helper_sve_##name##_zpzw_s, \
1075 }; \
1076 if (a->esz < 0 || a->esz >= 3) { \
1077 return false; \
1078 } \
1079 return do_zpzz_ool(s, a, fns[a->esz]); \
1080}
1081
1082DO_ZPZW(ASR, asr)
1083DO_ZPZW(LSR, lsr)
1084DO_ZPZW(LSL, lsl)
1085
1086#undef DO_ZPZW
1087
d9d78dcc
RH
1088/*
1089 *** SVE Bitwise Shift - Unpredicated Group
1090 */
1091
1092static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1093 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1094 int64_t, uint32_t, uint32_t))
1095{
1096 if (a->esz < 0) {
1097 /* Invalid tsz encoding -- see tszimm_esz. */
1098 return false;
1099 }
1100 if (sve_access_check(s)) {
1101 unsigned vsz = vec_full_reg_size(s);
1102 /* Shift by element size is architecturally valid. For
1103 arithmetic right-shift, it's the same as by one less.
1104 Otherwise it is a zeroing operation. */
1105 if (a->imm >= 8 << a->esz) {
1106 if (asr) {
1107 a->imm = (8 << a->esz) - 1;
1108 } else {
1109 do_dupi_z(s, a->rd, 0);
1110 return true;
1111 }
1112 }
1113 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1114 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1115 }
1116 return true;
1117}
1118
3a7be554 1119static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1120{
1121 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1122}
1123
3a7be554 1124static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1125{
1126 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1127}
1128
3a7be554 1129static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1130{
1131 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1132}
1133
d9d78dcc 1134#define DO_ZZW(NAME, name) \
32e2ad65 1135 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1136 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1137 gen_helper_sve_##name##_zzw_s, NULL \
1138 }; \
32e2ad65
RH
1139 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1140 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1141
32e2ad65
RH
1142DO_ZZW(ASR_zzw, asr)
1143DO_ZZW(LSR_zzw, lsr)
1144DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1145
1146#undef DO_ZZW
1147
96a36e4a
RH
1148/*
1149 *** SVE Integer Multiply-Add Group
1150 */
1151
1152static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1153 gen_helper_gvec_5 *fn)
1154{
1155 if (sve_access_check(s)) {
1156 unsigned vsz = vec_full_reg_size(s);
1157 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1158 vec_full_reg_offset(s, a->ra),
1159 vec_full_reg_offset(s, a->rn),
1160 vec_full_reg_offset(s, a->rm),
1161 pred_full_reg_offset(s, a->pg),
1162 vsz, vsz, 0, fn);
1163 }
1164 return true;
1165}
1166
1167#define DO_ZPZZZ(NAME, name) \
3a7be554 1168static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1169{ \
1170 static gen_helper_gvec_5 * const fns[4] = { \
1171 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1172 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1173 }; \
1174 return do_zpzzz_ool(s, a, fns[a->esz]); \
1175}
1176
1177DO_ZPZZZ(MLA, mla)
1178DO_ZPZZZ(MLS, mls)
1179
1180#undef DO_ZPZZZ
1181
9a56c9c3
RH
1182/*
1183 *** SVE Index Generation Group
1184 */
1185
1186static void do_index(DisasContext *s, int esz, int rd,
1187 TCGv_i64 start, TCGv_i64 incr)
1188{
1189 unsigned vsz = vec_full_reg_size(s);
c6a59b55 1190 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
9a56c9c3
RH
1191 TCGv_ptr t_zd = tcg_temp_new_ptr();
1192
1193 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1194 if (esz == 3) {
1195 gen_helper_sve_index_d(t_zd, start, incr, desc);
1196 } else {
1197 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1198 static index_fn * const fns[3] = {
1199 gen_helper_sve_index_b,
1200 gen_helper_sve_index_h,
1201 gen_helper_sve_index_s,
1202 };
1203 TCGv_i32 s32 = tcg_temp_new_i32();
1204 TCGv_i32 i32 = tcg_temp_new_i32();
1205
1206 tcg_gen_extrl_i64_i32(s32, start);
1207 tcg_gen_extrl_i64_i32(i32, incr);
1208 fns[esz](t_zd, s32, i32, desc);
1209
1210 tcg_temp_free_i32(s32);
1211 tcg_temp_free_i32(i32);
1212 }
1213 tcg_temp_free_ptr(t_zd);
9a56c9c3
RH
1214}
1215
3a7be554 1216static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1217{
1218 if (sve_access_check(s)) {
b0c3aece
RH
1219 TCGv_i64 start = tcg_constant_i64(a->imm1);
1220 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1221 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1222 }
1223 return true;
1224}
1225
3a7be554 1226static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1227{
1228 if (sve_access_check(s)) {
b0c3aece 1229 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1230 TCGv_i64 incr = cpu_reg(s, a->rm);
1231 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1232 }
1233 return true;
1234}
1235
3a7be554 1236static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1237{
1238 if (sve_access_check(s)) {
1239 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1240 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1241 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1242 }
1243 return true;
1244}
1245
3a7be554 1246static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1247{
1248 if (sve_access_check(s)) {
1249 TCGv_i64 start = cpu_reg(s, a->rn);
1250 TCGv_i64 incr = cpu_reg(s, a->rm);
1251 do_index(s, a->esz, a->rd, start, incr);
1252 }
1253 return true;
1254}
1255
96f922cc
RH
1256/*
1257 *** SVE Stack Allocation Group
1258 */
1259
3a7be554 1260static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1261{
5de56742
AC
1262 if (sve_access_check(s)) {
1263 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1264 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1265 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1266 }
96f922cc
RH
1267 return true;
1268}
1269
3a7be554 1270static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1271{
5de56742
AC
1272 if (sve_access_check(s)) {
1273 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1274 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1275 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1276 }
96f922cc
RH
1277 return true;
1278}
1279
3a7be554 1280static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1281{
5de56742
AC
1282 if (sve_access_check(s)) {
1283 TCGv_i64 reg = cpu_reg(s, a->rd);
1284 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1285 }
96f922cc
RH
1286 return true;
1287}
1288
4b242d9c
RH
1289/*
1290 *** SVE Compute Vector Address Group
1291 */
1292
1293static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1294{
913a8a00 1295 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1296}
1297
3a7be554 1298static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1299{
1300 return do_adr(s, a, gen_helper_sve_adr_p32);
1301}
1302
3a7be554 1303static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1304{
1305 return do_adr(s, a, gen_helper_sve_adr_p64);
1306}
1307
3a7be554 1308static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1309{
1310 return do_adr(s, a, gen_helper_sve_adr_s32);
1311}
1312
3a7be554 1313static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1314{
1315 return do_adr(s, a, gen_helper_sve_adr_u32);
1316}
1317
0762cd42
RH
1318/*
1319 *** SVE Integer Misc - Unpredicated Group
1320 */
1321
0ea3cdbf
RH
1322static gen_helper_gvec_2 * const fexpa_fns[4] = {
1323 NULL, gen_helper_sve_fexpa_h,
1324 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1325};
1326TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1327 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1328
32e2ad65
RH
1329static gen_helper_gvec_3 * const ftssel_fns[4] = {
1330 NULL, gen_helper_sve_ftssel_h,
1331 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1332};
1333TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1334
516e246a
RH
1335/*
1336 *** SVE Predicate Logical Operations Group
1337 */
1338
1339static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1340 const GVecGen4 *gvec_op)
1341{
1342 if (!sve_access_check(s)) {
1343 return true;
1344 }
1345
1346 unsigned psz = pred_gvec_reg_size(s);
1347 int dofs = pred_full_reg_offset(s, a->rd);
1348 int nofs = pred_full_reg_offset(s, a->rn);
1349 int mofs = pred_full_reg_offset(s, a->rm);
1350 int gofs = pred_full_reg_offset(s, a->pg);
1351
dd81a8d7
RH
1352 if (!a->s) {
1353 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1354 return true;
1355 }
1356
516e246a
RH
1357 if (psz == 8) {
1358 /* Do the operation and the flags generation in temps. */
1359 TCGv_i64 pd = tcg_temp_new_i64();
1360 TCGv_i64 pn = tcg_temp_new_i64();
1361 TCGv_i64 pm = tcg_temp_new_i64();
1362 TCGv_i64 pg = tcg_temp_new_i64();
1363
1364 tcg_gen_ld_i64(pn, cpu_env, nofs);
1365 tcg_gen_ld_i64(pm, cpu_env, mofs);
1366 tcg_gen_ld_i64(pg, cpu_env, gofs);
1367
1368 gvec_op->fni8(pd, pn, pm, pg);
1369 tcg_gen_st_i64(pd, cpu_env, dofs);
1370
1371 do_predtest1(pd, pg);
1372
1373 tcg_temp_free_i64(pd);
1374 tcg_temp_free_i64(pn);
1375 tcg_temp_free_i64(pm);
1376 tcg_temp_free_i64(pg);
1377 } else {
1378 /* The operation and flags generation is large. The computation
1379 * of the flags depends on the original contents of the guarding
1380 * predicate. If the destination overwrites the guarding predicate,
1381 * then the easiest way to get this right is to save a copy.
1382 */
1383 int tofs = gofs;
1384 if (a->rd == a->pg) {
1385 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1386 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1387 }
1388
1389 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1390 do_predtest(s, dofs, tofs, psz / 8);
1391 }
1392 return true;
1393}
1394
1395static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1396{
1397 tcg_gen_and_i64(pd, pn, pm);
1398 tcg_gen_and_i64(pd, pd, pg);
1399}
1400
1401static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1402 TCGv_vec pm, TCGv_vec pg)
1403{
1404 tcg_gen_and_vec(vece, pd, pn, pm);
1405 tcg_gen_and_vec(vece, pd, pd, pg);
1406}
1407
3a7be554 1408static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1409{
1410 static const GVecGen4 op = {
1411 .fni8 = gen_and_pg_i64,
1412 .fniv = gen_and_pg_vec,
1413 .fno = gen_helper_sve_and_pppp,
1414 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1415 };
dd81a8d7
RH
1416
1417 if (!a->s) {
1418 if (!sve_access_check(s)) {
1419 return true;
1420 }
1421 if (a->rn == a->rm) {
1422 if (a->pg == a->rn) {
1423 do_mov_p(s, a->rd, a->rn);
1424 } else {
1425 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1426 }
1427 return true;
1428 } else if (a->pg == a->rn || a->pg == a->rm) {
1429 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1430 return true;
516e246a 1431 }
516e246a 1432 }
dd81a8d7 1433 return do_pppp_flags(s, a, &op);
516e246a
RH
1434}
1435
1436static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1437{
1438 tcg_gen_andc_i64(pd, pn, pm);
1439 tcg_gen_and_i64(pd, pd, pg);
1440}
1441
1442static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1443 TCGv_vec pm, TCGv_vec pg)
1444{
1445 tcg_gen_andc_vec(vece, pd, pn, pm);
1446 tcg_gen_and_vec(vece, pd, pd, pg);
1447}
1448
3a7be554 1449static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1450{
1451 static const GVecGen4 op = {
1452 .fni8 = gen_bic_pg_i64,
1453 .fniv = gen_bic_pg_vec,
1454 .fno = gen_helper_sve_bic_pppp,
1455 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1456 };
dd81a8d7
RH
1457
1458 if (!a->s && a->pg == a->rn) {
1459 if (sve_access_check(s)) {
1460 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1461 }
1462 return true;
516e246a 1463 }
dd81a8d7 1464 return do_pppp_flags(s, a, &op);
516e246a
RH
1465}
1466
1467static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1468{
1469 tcg_gen_xor_i64(pd, pn, pm);
1470 tcg_gen_and_i64(pd, pd, pg);
1471}
1472
1473static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1474 TCGv_vec pm, TCGv_vec pg)
1475{
1476 tcg_gen_xor_vec(vece, pd, pn, pm);
1477 tcg_gen_and_vec(vece, pd, pd, pg);
1478}
1479
3a7be554 1480static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1481{
1482 static const GVecGen4 op = {
1483 .fni8 = gen_eor_pg_i64,
1484 .fniv = gen_eor_pg_vec,
1485 .fno = gen_helper_sve_eor_pppp,
1486 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1487 };
dd81a8d7 1488 return do_pppp_flags(s, a, &op);
516e246a
RH
1489}
1490
3a7be554 1491static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1492{
516e246a
RH
1493 if (a->s) {
1494 return false;
516e246a 1495 }
d4bc6232
RH
1496 if (sve_access_check(s)) {
1497 unsigned psz = pred_gvec_reg_size(s);
1498 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1499 pred_full_reg_offset(s, a->pg),
1500 pred_full_reg_offset(s, a->rn),
1501 pred_full_reg_offset(s, a->rm), psz, psz);
1502 }
1503 return true;
516e246a
RH
1504}
1505
1506static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1507{
1508 tcg_gen_or_i64(pd, pn, pm);
1509 tcg_gen_and_i64(pd, pd, pg);
1510}
1511
1512static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1513 TCGv_vec pm, TCGv_vec pg)
1514{
1515 tcg_gen_or_vec(vece, pd, pn, pm);
1516 tcg_gen_and_vec(vece, pd, pd, pg);
1517}
1518
3a7be554 1519static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1520{
1521 static const GVecGen4 op = {
1522 .fni8 = gen_orr_pg_i64,
1523 .fniv = gen_orr_pg_vec,
1524 .fno = gen_helper_sve_orr_pppp,
1525 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1526 };
dd81a8d7
RH
1527
1528 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1529 return do_mov_p(s, a->rd, a->rn);
516e246a 1530 }
dd81a8d7 1531 return do_pppp_flags(s, a, &op);
516e246a
RH
1532}
1533
1534static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1535{
1536 tcg_gen_orc_i64(pd, pn, pm);
1537 tcg_gen_and_i64(pd, pd, pg);
1538}
1539
1540static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1541 TCGv_vec pm, TCGv_vec pg)
1542{
1543 tcg_gen_orc_vec(vece, pd, pn, pm);
1544 tcg_gen_and_vec(vece, pd, pd, pg);
1545}
1546
3a7be554 1547static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1548{
1549 static const GVecGen4 op = {
1550 .fni8 = gen_orn_pg_i64,
1551 .fniv = gen_orn_pg_vec,
1552 .fno = gen_helper_sve_orn_pppp,
1553 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1554 };
dd81a8d7 1555 return do_pppp_flags(s, a, &op);
516e246a
RH
1556}
1557
1558static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1559{
1560 tcg_gen_or_i64(pd, pn, pm);
1561 tcg_gen_andc_i64(pd, pg, pd);
1562}
1563
1564static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1565 TCGv_vec pm, TCGv_vec pg)
1566{
1567 tcg_gen_or_vec(vece, pd, pn, pm);
1568 tcg_gen_andc_vec(vece, pd, pg, pd);
1569}
1570
3a7be554 1571static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1572{
1573 static const GVecGen4 op = {
1574 .fni8 = gen_nor_pg_i64,
1575 .fniv = gen_nor_pg_vec,
1576 .fno = gen_helper_sve_nor_pppp,
1577 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1578 };
dd81a8d7 1579 return do_pppp_flags(s, a, &op);
516e246a
RH
1580}
1581
1582static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1583{
1584 tcg_gen_and_i64(pd, pn, pm);
1585 tcg_gen_andc_i64(pd, pg, pd);
1586}
1587
1588static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1589 TCGv_vec pm, TCGv_vec pg)
1590{
1591 tcg_gen_and_vec(vece, pd, pn, pm);
1592 tcg_gen_andc_vec(vece, pd, pg, pd);
1593}
1594
3a7be554 1595static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1596{
1597 static const GVecGen4 op = {
1598 .fni8 = gen_nand_pg_i64,
1599 .fniv = gen_nand_pg_vec,
1600 .fno = gen_helper_sve_nand_pppp,
1601 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1602 };
dd81a8d7 1603 return do_pppp_flags(s, a, &op);
516e246a
RH
1604}
1605
9e18d7a6
RH
1606/*
1607 *** SVE Predicate Misc Group
1608 */
1609
3a7be554 1610static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1611{
1612 if (sve_access_check(s)) {
1613 int nofs = pred_full_reg_offset(s, a->rn);
1614 int gofs = pred_full_reg_offset(s, a->pg);
1615 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1616
1617 if (words == 1) {
1618 TCGv_i64 pn = tcg_temp_new_i64();
1619 TCGv_i64 pg = tcg_temp_new_i64();
1620
1621 tcg_gen_ld_i64(pn, cpu_env, nofs);
1622 tcg_gen_ld_i64(pg, cpu_env, gofs);
1623 do_predtest1(pn, pg);
1624
1625 tcg_temp_free_i64(pn);
1626 tcg_temp_free_i64(pg);
1627 } else {
1628 do_predtest(s, nofs, gofs, words);
1629 }
1630 }
1631 return true;
1632}
1633
028e2a7b
RH
1634/* See the ARM pseudocode DecodePredCount. */
1635static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1636{
1637 unsigned elements = fullsz >> esz;
1638 unsigned bound;
1639
1640 switch (pattern) {
1641 case 0x0: /* POW2 */
1642 return pow2floor(elements);
1643 case 0x1: /* VL1 */
1644 case 0x2: /* VL2 */
1645 case 0x3: /* VL3 */
1646 case 0x4: /* VL4 */
1647 case 0x5: /* VL5 */
1648 case 0x6: /* VL6 */
1649 case 0x7: /* VL7 */
1650 case 0x8: /* VL8 */
1651 bound = pattern;
1652 break;
1653 case 0x9: /* VL16 */
1654 case 0xa: /* VL32 */
1655 case 0xb: /* VL64 */
1656 case 0xc: /* VL128 */
1657 case 0xd: /* VL256 */
1658 bound = 16 << (pattern - 9);
1659 break;
1660 case 0x1d: /* MUL4 */
1661 return elements - elements % 4;
1662 case 0x1e: /* MUL3 */
1663 return elements - elements % 3;
1664 case 0x1f: /* ALL */
1665 return elements;
1666 default: /* #uimm5 */
1667 return 0;
1668 }
1669 return elements >= bound ? bound : 0;
1670}
1671
1672/* This handles all of the predicate initialization instructions,
1673 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1674 * so that decode_pred_count returns 0. For SETFFR, we will have
1675 * set RD == 16 == FFR.
1676 */
1677static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1678{
1679 if (!sve_access_check(s)) {
1680 return true;
1681 }
1682
1683 unsigned fullsz = vec_full_reg_size(s);
1684 unsigned ofs = pred_full_reg_offset(s, rd);
1685 unsigned numelem, setsz, i;
1686 uint64_t word, lastword;
1687 TCGv_i64 t;
1688
1689 numelem = decode_pred_count(fullsz, pat, esz);
1690
1691 /* Determine what we must store into each bit, and how many. */
1692 if (numelem == 0) {
1693 lastword = word = 0;
1694 setsz = fullsz;
1695 } else {
1696 setsz = numelem << esz;
1697 lastword = word = pred_esz_masks[esz];
1698 if (setsz % 64) {
973558a3 1699 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1700 }
1701 }
1702
1703 t = tcg_temp_new_i64();
1704 if (fullsz <= 64) {
1705 tcg_gen_movi_i64(t, lastword);
1706 tcg_gen_st_i64(t, cpu_env, ofs);
1707 goto done;
1708 }
1709
1710 if (word == lastword) {
1711 unsigned maxsz = size_for_gvec(fullsz / 8);
1712 unsigned oprsz = size_for_gvec(setsz / 8);
1713
1714 if (oprsz * 8 == setsz) {
8711e71f 1715 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1716 goto done;
1717 }
028e2a7b
RH
1718 }
1719
1720 setsz /= 8;
1721 fullsz /= 8;
1722
1723 tcg_gen_movi_i64(t, word);
973558a3 1724 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1725 tcg_gen_st_i64(t, cpu_env, ofs + i);
1726 }
1727 if (lastword != word) {
1728 tcg_gen_movi_i64(t, lastword);
1729 tcg_gen_st_i64(t, cpu_env, ofs + i);
1730 i += 8;
1731 }
1732 if (i < fullsz) {
1733 tcg_gen_movi_i64(t, 0);
1734 for (; i < fullsz; i += 8) {
1735 tcg_gen_st_i64(t, cpu_env, ofs + i);
1736 }
1737 }
1738
1739 done:
1740 tcg_temp_free_i64(t);
1741
1742 /* PTRUES */
1743 if (setflag) {
1744 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1745 tcg_gen_movi_i32(cpu_CF, word == 0);
1746 tcg_gen_movi_i32(cpu_VF, 0);
1747 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1748 }
1749 return true;
1750}
1751
3a7be554 1752static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1753{
1754 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1755}
1756
3a7be554 1757static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1758{
1759 /* Note pat == 31 is #all, to set all elements. */
1760 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1761}
1762
3a7be554 1763static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1764{
1765 /* Note pat == 32 is #unimp, to set no elements. */
1766 return do_predset(s, 0, a->rd, 32, false);
1767}
1768
3a7be554 1769static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1770{
1771 /* The path through do_pppp_flags is complicated enough to want to avoid
1772 * duplication. Frob the arguments into the form of a predicated AND.
1773 */
1774 arg_rprr_s alt_a = {
1775 .rd = a->rd, .pg = a->pg, .s = a->s,
1776 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1777 };
3a7be554 1778 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1779}
1780
3a7be554 1781static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1782{
1783 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1784}
1785
3a7be554 1786static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1787{
1788 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1789}
1790
1791static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1792 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1793 TCGv_ptr, TCGv_i32))
1794{
1795 if (!sve_access_check(s)) {
1796 return true;
1797 }
1798
1799 TCGv_ptr t_pd = tcg_temp_new_ptr();
1800 TCGv_ptr t_pg = tcg_temp_new_ptr();
1801 TCGv_i32 t;
86300b5d 1802 unsigned desc = 0;
028e2a7b 1803
86300b5d
RH
1804 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1805 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1806
1807 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1808 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1809 t = tcg_temp_new_i32();
028e2a7b 1810
392acacc 1811 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1812 tcg_temp_free_ptr(t_pd);
1813 tcg_temp_free_ptr(t_pg);
1814
1815 do_pred_flags(t);
1816 tcg_temp_free_i32(t);
1817 return true;
1818}
1819
3a7be554 1820static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1821{
1822 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1823}
1824
3a7be554 1825static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1826{
1827 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1828}
1829
24e82e68
RH
1830/*
1831 *** SVE Element Count Group
1832 */
1833
1834/* Perform an inline saturating addition of a 32-bit value within
1835 * a 64-bit register. The second operand is known to be positive,
1836 * which halves the comparisions we must perform to bound the result.
1837 */
1838static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1839{
1840 int64_t ibound;
24e82e68
RH
1841
1842 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1843 if (u) {
1844 tcg_gen_ext32u_i64(reg, reg);
1845 } else {
1846 tcg_gen_ext32s_i64(reg, reg);
1847 }
1848 if (d) {
1849 tcg_gen_sub_i64(reg, reg, val);
1850 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1851 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1852 } else {
1853 tcg_gen_add_i64(reg, reg, val);
1854 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1855 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1856 }
24e82e68
RH
1857}
1858
1859/* Similarly with 64-bit values. */
1860static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1861{
1862 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1863 TCGv_i64 t2;
1864
1865 if (u) {
1866 if (d) {
1867 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1868 t2 = tcg_constant_i64(0);
1869 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1870 } else {
1871 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1872 t2 = tcg_constant_i64(-1);
1873 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1874 }
1875 } else {
35a1ec8e 1876 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1877 if (d) {
1878 /* Detect signed overflow for subtraction. */
1879 tcg_gen_xor_i64(t0, reg, val);
1880 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1881 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1882 tcg_gen_and_i64(t0, t0, reg);
1883
1884 /* Bound the result. */
1885 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1886 t2 = tcg_constant_i64(0);
24e82e68
RH
1887 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1888 } else {
1889 /* Detect signed overflow for addition. */
1890 tcg_gen_xor_i64(t0, reg, val);
1891 tcg_gen_add_i64(reg, reg, val);
1892 tcg_gen_xor_i64(t1, reg, val);
1893 tcg_gen_andc_i64(t0, t1, t0);
1894
1895 /* Bound the result. */
1896 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1897 t2 = tcg_constant_i64(0);
24e82e68
RH
1898 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1899 }
35a1ec8e 1900 tcg_temp_free_i64(t1);
24e82e68
RH
1901 }
1902 tcg_temp_free_i64(t0);
24e82e68
RH
1903}
1904
1905/* Similarly with a vector and a scalar operand. */
1906static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1907 TCGv_i64 val, bool u, bool d)
1908{
1909 unsigned vsz = vec_full_reg_size(s);
1910 TCGv_ptr dptr, nptr;
1911 TCGv_i32 t32, desc;
1912 TCGv_i64 t64;
1913
1914 dptr = tcg_temp_new_ptr();
1915 nptr = tcg_temp_new_ptr();
1916 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1917 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1918 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1919
1920 switch (esz) {
1921 case MO_8:
1922 t32 = tcg_temp_new_i32();
1923 tcg_gen_extrl_i64_i32(t32, val);
1924 if (d) {
1925 tcg_gen_neg_i32(t32, t32);
1926 }
1927 if (u) {
1928 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1929 } else {
1930 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1931 }
1932 tcg_temp_free_i32(t32);
1933 break;
1934
1935 case MO_16:
1936 t32 = tcg_temp_new_i32();
1937 tcg_gen_extrl_i64_i32(t32, val);
1938 if (d) {
1939 tcg_gen_neg_i32(t32, t32);
1940 }
1941 if (u) {
1942 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1943 } else {
1944 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1945 }
1946 tcg_temp_free_i32(t32);
1947 break;
1948
1949 case MO_32:
1950 t64 = tcg_temp_new_i64();
1951 if (d) {
1952 tcg_gen_neg_i64(t64, val);
1953 } else {
1954 tcg_gen_mov_i64(t64, val);
1955 }
1956 if (u) {
1957 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1958 } else {
1959 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1960 }
1961 tcg_temp_free_i64(t64);
1962 break;
1963
1964 case MO_64:
1965 if (u) {
1966 if (d) {
1967 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1968 } else {
1969 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1970 }
1971 } else if (d) {
1972 t64 = tcg_temp_new_i64();
1973 tcg_gen_neg_i64(t64, val);
1974 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1975 tcg_temp_free_i64(t64);
1976 } else {
1977 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1978 }
1979 break;
1980
1981 default:
1982 g_assert_not_reached();
1983 }
1984
1985 tcg_temp_free_ptr(dptr);
1986 tcg_temp_free_ptr(nptr);
24e82e68
RH
1987}
1988
3a7be554 1989static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1990{
1991 if (sve_access_check(s)) {
1992 unsigned fullsz = vec_full_reg_size(s);
1993 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1994 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1995 }
1996 return true;
1997}
1998
3a7be554 1999static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2000{
2001 if (sve_access_check(s)) {
2002 unsigned fullsz = vec_full_reg_size(s);
2003 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2004 int inc = numelem * a->imm * (a->d ? -1 : 1);
2005 TCGv_i64 reg = cpu_reg(s, a->rd);
2006
2007 tcg_gen_addi_i64(reg, reg, inc);
2008 }
2009 return true;
2010}
2011
3a7be554 2012static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2013{
2014 if (!sve_access_check(s)) {
2015 return true;
2016 }
2017
2018 unsigned fullsz = vec_full_reg_size(s);
2019 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2020 int inc = numelem * a->imm;
2021 TCGv_i64 reg = cpu_reg(s, a->rd);
2022
2023 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2024 if (inc == 0) {
2025 if (a->u) {
2026 tcg_gen_ext32u_i64(reg, reg);
2027 } else {
2028 tcg_gen_ext32s_i64(reg, reg);
2029 }
2030 } else {
d681f125 2031 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2032 }
2033 return true;
2034}
2035
3a7be554 2036static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2037{
2038 if (!sve_access_check(s)) {
2039 return true;
2040 }
2041
2042 unsigned fullsz = vec_full_reg_size(s);
2043 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2044 int inc = numelem * a->imm;
2045 TCGv_i64 reg = cpu_reg(s, a->rd);
2046
2047 if (inc != 0) {
d681f125 2048 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2049 }
2050 return true;
2051}
2052
3a7be554 2053static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2054{
2055 if (a->esz == 0) {
2056 return false;
2057 }
2058
2059 unsigned fullsz = vec_full_reg_size(s);
2060 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2061 int inc = numelem * a->imm;
2062
2063 if (inc != 0) {
2064 if (sve_access_check(s)) {
24e82e68
RH
2065 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2066 vec_full_reg_offset(s, a->rn),
d681f125
RH
2067 tcg_constant_i64(a->d ? -inc : inc),
2068 fullsz, fullsz);
24e82e68
RH
2069 }
2070 } else {
2071 do_mov_z(s, a->rd, a->rn);
2072 }
2073 return true;
2074}
2075
3a7be554 2076static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2077{
2078 if (a->esz == 0) {
2079 return false;
2080 }
2081
2082 unsigned fullsz = vec_full_reg_size(s);
2083 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2084 int inc = numelem * a->imm;
2085
2086 if (inc != 0) {
2087 if (sve_access_check(s)) {
d681f125
RH
2088 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2089 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2090 }
2091 } else {
2092 do_mov_z(s, a->rd, a->rn);
2093 }
2094 return true;
2095}
2096
e1fa1164
RH
2097/*
2098 *** SVE Bitwise Immediate Group
2099 */
2100
2101static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2102{
2103 uint64_t imm;
2104 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2105 extract32(a->dbm, 0, 6),
2106 extract32(a->dbm, 6, 6))) {
2107 return false;
2108 }
2109 if (sve_access_check(s)) {
2110 unsigned vsz = vec_full_reg_size(s);
2111 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2112 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2113 }
2114 return true;
2115}
2116
3a7be554 2117static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2118{
2119 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2120}
2121
3a7be554 2122static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2123{
2124 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2125}
2126
3a7be554 2127static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2128{
2129 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2130}
2131
3a7be554 2132static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2133{
2134 uint64_t imm;
2135 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2136 extract32(a->dbm, 0, 6),
2137 extract32(a->dbm, 6, 6))) {
2138 return false;
2139 }
2140 if (sve_access_check(s)) {
2141 do_dupi_z(s, a->rd, imm);
2142 }
2143 return true;
2144}
2145
f25a2361
RH
2146/*
2147 *** SVE Integer Wide Immediate - Predicated Group
2148 */
2149
2150/* Implement all merging copies. This is used for CPY (immediate),
2151 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2152 */
2153static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2154 TCGv_i64 val)
2155{
2156 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2157 static gen_cpy * const fns[4] = {
2158 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2159 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2160 };
2161 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2162 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2163 TCGv_ptr t_zd = tcg_temp_new_ptr();
2164 TCGv_ptr t_zn = tcg_temp_new_ptr();
2165 TCGv_ptr t_pg = tcg_temp_new_ptr();
2166
2167 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2168 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2169 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2170
2171 fns[esz](t_zd, t_zn, t_pg, val, desc);
2172
2173 tcg_temp_free_ptr(t_zd);
2174 tcg_temp_free_ptr(t_zn);
2175 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2176}
2177
3a7be554 2178static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2179{
2180 if (a->esz == 0) {
2181 return false;
2182 }
2183 if (sve_access_check(s)) {
2184 /* Decode the VFP immediate. */
2185 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2186 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2187 }
2188 return true;
2189}
2190
3a7be554 2191static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2192{
3a7be554 2193 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2194 return false;
2195 }
2196 if (sve_access_check(s)) {
e152b48b 2197 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2198 }
2199 return true;
2200}
2201
3a7be554 2202static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2203{
2204 static gen_helper_gvec_2i * const fns[4] = {
2205 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2206 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2207 };
2208
3a7be554 2209 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2210 return false;
2211 }
2212 if (sve_access_check(s)) {
2213 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2214 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2215 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2216 tcg_constant_i64(a->imm),
2217 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2218 }
2219 return true;
2220}
2221
b94f8f60
RH
2222/*
2223 *** SVE Permute Extract Group
2224 */
2225
75114792 2226static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2227{
2228 if (!sve_access_check(s)) {
2229 return true;
2230 }
2231
2232 unsigned vsz = vec_full_reg_size(s);
75114792 2233 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2234 unsigned n_siz = vsz - n_ofs;
75114792
SL
2235 unsigned d = vec_full_reg_offset(s, rd);
2236 unsigned n = vec_full_reg_offset(s, rn);
2237 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2238
2239 /* Use host vector move insns if we have appropriate sizes
2240 * and no unfortunate overlap.
2241 */
2242 if (m != d
2243 && n_ofs == size_for_gvec(n_ofs)
2244 && n_siz == size_for_gvec(n_siz)
2245 && (d != n || n_siz <= n_ofs)) {
2246 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2247 if (n_ofs != 0) {
2248 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2249 }
2250 } else {
2251 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2252 }
2253 return true;
2254}
2255
75114792
SL
2256static bool trans_EXT(DisasContext *s, arg_EXT *a)
2257{
2258 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2259}
2260
2261static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2262{
2263 if (!dc_isar_feature(aa64_sve2, s)) {
2264 return false;
2265 }
2266 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2267}
2268
30562ab7
RH
2269/*
2270 *** SVE Permute - Unpredicated Group
2271 */
2272
3a7be554 2273static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2274{
2275 if (sve_access_check(s)) {
2276 unsigned vsz = vec_full_reg_size(s);
2277 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2278 vsz, vsz, cpu_reg_sp(s, a->rn));
2279 }
2280 return true;
2281}
2282
3a7be554 2283static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2284{
2285 if ((a->imm & 0x1f) == 0) {
2286 return false;
2287 }
2288 if (sve_access_check(s)) {
2289 unsigned vsz = vec_full_reg_size(s);
2290 unsigned dofs = vec_full_reg_offset(s, a->rd);
2291 unsigned esz, index;
2292
2293 esz = ctz32(a->imm);
2294 index = a->imm >> (esz + 1);
2295
2296 if ((index << esz) < vsz) {
2297 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2298 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2299 } else {
7e17d50e
RH
2300 /*
2301 * While dup_mem handles 128-bit elements, dup_imm does not.
2302 * Thankfully element size doesn't matter for splatting zero.
2303 */
2304 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2305 }
2306 }
2307 return true;
2308}
2309
2310static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2311{
2312 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2313 static gen_insr * const fns[4] = {
2314 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2315 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2316 };
2317 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2318 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2319 TCGv_ptr t_zd = tcg_temp_new_ptr();
2320 TCGv_ptr t_zn = tcg_temp_new_ptr();
2321
2322 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2323 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2324
2325 fns[a->esz](t_zd, t_zn, val, desc);
2326
2327 tcg_temp_free_ptr(t_zd);
2328 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2329}
2330
3a7be554 2331static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2332{
2333 if (sve_access_check(s)) {
2334 TCGv_i64 t = tcg_temp_new_i64();
2335 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2336 do_insr_i64(s, a, t);
2337 tcg_temp_free_i64(t);
2338 }
2339 return true;
2340}
2341
3a7be554 2342static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2343{
2344 if (sve_access_check(s)) {
2345 do_insr_i64(s, a, cpu_reg(s, a->rm));
2346 }
2347 return true;
2348}
2349
0ea3cdbf
RH
2350static gen_helper_gvec_2 * const rev_fns[4] = {
2351 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2352 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2353};
2354TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2355
32e2ad65
RH
2356static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2357 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2358 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2359};
2360TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2361
5f425b92
RH
2362static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2363 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2364 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2365};
2366TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2367 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2368
32e2ad65
RH
2369static gen_helper_gvec_3 * const tbx_fns[4] = {
2370 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2371 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2372};
2373TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2374
3a7be554 2375static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2376{
2377 static gen_helper_gvec_2 * const fns[4][2] = {
2378 { NULL, NULL },
2379 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2380 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2381 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2382 };
2383
2384 if (a->esz == 0) {
2385 return false;
2386 }
2387 if (sve_access_check(s)) {
2388 unsigned vsz = vec_full_reg_size(s);
2389 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2390 vec_full_reg_offset(s, a->rn)
2391 + (a->h ? vsz / 2 : 0),
2392 vsz, vsz, 0, fns[a->esz][a->u]);
2393 }
2394 return true;
2395}
2396
d731d8cb
RH
2397/*
2398 *** SVE Permute - Predicates Group
2399 */
2400
2401static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2402 gen_helper_gvec_3 *fn)
2403{
2404 if (!sve_access_check(s)) {
2405 return true;
2406 }
2407
2408 unsigned vsz = pred_full_reg_size(s);
2409
d731d8cb
RH
2410 TCGv_ptr t_d = tcg_temp_new_ptr();
2411 TCGv_ptr t_n = tcg_temp_new_ptr();
2412 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2413 uint32_t desc = 0;
d731d8cb 2414
f9b0fcce
RH
2415 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2416 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2417 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2418
2419 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2420 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2421 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2422
c6a59b55 2423 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2424
2425 tcg_temp_free_ptr(t_d);
2426 tcg_temp_free_ptr(t_n);
2427 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2428 return true;
2429}
2430
2431static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2432 gen_helper_gvec_2 *fn)
2433{
2434 if (!sve_access_check(s)) {
2435 return true;
2436 }
2437
2438 unsigned vsz = pred_full_reg_size(s);
2439 TCGv_ptr t_d = tcg_temp_new_ptr();
2440 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2441 uint32_t desc = 0;
d731d8cb
RH
2442
2443 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2444 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2445
70acaafe
RH
2446 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2447 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2448 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2449
c6a59b55 2450 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2451
d731d8cb
RH
2452 tcg_temp_free_ptr(t_d);
2453 tcg_temp_free_ptr(t_n);
2454 return true;
2455}
2456
3a7be554 2457static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2458{
2459 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2460}
2461
3a7be554 2462static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2463{
2464 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2465}
2466
3a7be554 2467static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2468{
2469 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2470}
2471
3a7be554 2472static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2473{
2474 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2475}
2476
3a7be554 2477static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2478{
2479 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2480}
2481
3a7be554 2482static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2483{
2484 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2485}
2486
3a7be554 2487static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2488{
2489 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2490}
2491
3a7be554 2492static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2493{
2494 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2495}
2496
3a7be554 2497static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2498{
2499 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2500}
2501
234b48e9
RH
2502/*
2503 *** SVE Permute - Interleaving Group
2504 */
2505
2506static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2507{
2508 static gen_helper_gvec_3 * const fns[4] = {
2509 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2510 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2511 };
2512
2513 if (sve_access_check(s)) {
2514 unsigned vsz = vec_full_reg_size(s);
2515 unsigned high_ofs = high ? vsz / 2 : 0;
2516 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2517 vec_full_reg_offset(s, a->rn) + high_ofs,
2518 vec_full_reg_offset(s, a->rm) + high_ofs,
2519 vsz, vsz, 0, fns[a->esz]);
2520 }
2521 return true;
2522}
2523
3a7be554 2524static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2525{
2526 return do_zip(s, a, false);
2527}
2528
3a7be554 2529static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2530{
2531 return do_zip(s, a, true);
2532}
2533
74b64b25
RH
2534static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2535{
2536 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2537 return false;
2538 }
2539 if (sve_access_check(s)) {
2540 unsigned vsz = vec_full_reg_size(s);
2541 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2542 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2543 vec_full_reg_offset(s, a->rn) + high_ofs,
2544 vec_full_reg_offset(s, a->rm) + high_ofs,
2545 vsz, vsz, 0, gen_helper_sve2_zip_q);
2546 }
2547 return true;
2548}
2549
2550static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2551{
2552 return do_zip_q(s, a, false);
2553}
2554
2555static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2556{
2557 return do_zip_q(s, a, true);
2558}
2559
234b48e9
RH
2560static gen_helper_gvec_3 * const uzp_fns[4] = {
2561 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2562 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2563};
2564
32e2ad65
RH
2565TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2566 uzp_fns[a->esz], a, 0)
2567TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2568 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2569
32e2ad65
RH
2570TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2571 gen_helper_sve2_uzp_q, a, 0)
2572TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2573 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2574
234b48e9
RH
2575static gen_helper_gvec_3 * const trn_fns[4] = {
2576 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2577 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2578};
2579
32e2ad65
RH
2580TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2581 trn_fns[a->esz], a, 0)
2582TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2583 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2584
32e2ad65
RH
2585TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2586 gen_helper_sve2_trn_q, a, 0)
2587TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2588 gen_helper_sve2_trn_q, a, 16)
74b64b25 2589
3ca879ae
RH
2590/*
2591 *** SVE Permute Vector - Predicated Group
2592 */
2593
817bd5c9
RH
2594static gen_helper_gvec_3 * const compact_fns[4] = {
2595 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2596};
2597TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2598
ef23cb72
RH
2599/* Call the helper that computes the ARM LastActiveElement pseudocode
2600 * function, scaled by the element size. This includes the not found
2601 * indication; e.g. not found for esz=3 is -8.
2602 */
2603static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2604{
2605 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2606 * round up, as we do elsewhere, because we need the exact size.
2607 */
2608 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2609 unsigned desc = 0;
ef23cb72 2610
2acbfbe4
RH
2611 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2612 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2613
2614 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2615
c6a59b55 2616 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2617
ef23cb72
RH
2618 tcg_temp_free_ptr(t_p);
2619}
2620
2621/* Increment LAST to the offset of the next element in the vector,
2622 * wrapping around to 0.
2623 */
2624static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2625{
2626 unsigned vsz = vec_full_reg_size(s);
2627
2628 tcg_gen_addi_i32(last, last, 1 << esz);
2629 if (is_power_of_2(vsz)) {
2630 tcg_gen_andi_i32(last, last, vsz - 1);
2631 } else {
4b308bd5
RH
2632 TCGv_i32 max = tcg_constant_i32(vsz);
2633 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2634 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2635 }
2636}
2637
2638/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2639static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2640{
2641 unsigned vsz = vec_full_reg_size(s);
2642
2643 if (is_power_of_2(vsz)) {
2644 tcg_gen_andi_i32(last, last, vsz - 1);
2645 } else {
4b308bd5
RH
2646 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2647 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2648 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2649 }
2650}
2651
2652/* Load an unsigned element of ESZ from BASE+OFS. */
2653static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2654{
2655 TCGv_i64 r = tcg_temp_new_i64();
2656
2657 switch (esz) {
2658 case 0:
2659 tcg_gen_ld8u_i64(r, base, ofs);
2660 break;
2661 case 1:
2662 tcg_gen_ld16u_i64(r, base, ofs);
2663 break;
2664 case 2:
2665 tcg_gen_ld32u_i64(r, base, ofs);
2666 break;
2667 case 3:
2668 tcg_gen_ld_i64(r, base, ofs);
2669 break;
2670 default:
2671 g_assert_not_reached();
2672 }
2673 return r;
2674}
2675
2676/* Load an unsigned element of ESZ from RM[LAST]. */
2677static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2678 int rm, int esz)
2679{
2680 TCGv_ptr p = tcg_temp_new_ptr();
2681 TCGv_i64 r;
2682
2683 /* Convert offset into vector into offset into ENV.
2684 * The final adjustment for the vector register base
2685 * is added via constant offset to the load.
2686 */
e03b5686 2687#if HOST_BIG_ENDIAN
ef23cb72
RH
2688 /* Adjust for element ordering. See vec_reg_offset. */
2689 if (esz < 3) {
2690 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2691 }
2692#endif
2693 tcg_gen_ext_i32_ptr(p, last);
2694 tcg_gen_add_ptr(p, p, cpu_env);
2695
2696 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2697 tcg_temp_free_ptr(p);
2698
2699 return r;
2700}
2701
2702/* Compute CLAST for a Zreg. */
2703static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2704{
2705 TCGv_i32 last;
2706 TCGLabel *over;
2707 TCGv_i64 ele;
2708 unsigned vsz, esz = a->esz;
2709
2710 if (!sve_access_check(s)) {
2711 return true;
2712 }
2713
2714 last = tcg_temp_local_new_i32();
2715 over = gen_new_label();
2716
2717 find_last_active(s, last, esz, a->pg);
2718
2719 /* There is of course no movcond for a 2048-bit vector,
2720 * so we must branch over the actual store.
2721 */
2722 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2723
2724 if (!before) {
2725 incr_last_active(s, last, esz);
2726 }
2727
2728 ele = load_last_active(s, last, a->rm, esz);
2729 tcg_temp_free_i32(last);
2730
2731 vsz = vec_full_reg_size(s);
2732 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2733 tcg_temp_free_i64(ele);
2734
2735 /* If this insn used MOVPRFX, we may need a second move. */
2736 if (a->rd != a->rn) {
2737 TCGLabel *done = gen_new_label();
2738 tcg_gen_br(done);
2739
2740 gen_set_label(over);
2741 do_mov_z(s, a->rd, a->rn);
2742
2743 gen_set_label(done);
2744 } else {
2745 gen_set_label(over);
2746 }
2747 return true;
2748}
2749
3a7be554 2750static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2751{
2752 return do_clast_vector(s, a, false);
2753}
2754
3a7be554 2755static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2756{
2757 return do_clast_vector(s, a, true);
2758}
2759
2760/* Compute CLAST for a scalar. */
2761static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2762 bool before, TCGv_i64 reg_val)
2763{
2764 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2765 TCGv_i64 ele, cmp;
ef23cb72
RH
2766
2767 find_last_active(s, last, esz, pg);
2768
2769 /* Extend the original value of last prior to incrementing. */
2770 cmp = tcg_temp_new_i64();
2771 tcg_gen_ext_i32_i64(cmp, last);
2772
2773 if (!before) {
2774 incr_last_active(s, last, esz);
2775 }
2776
2777 /* The conceit here is that while last < 0 indicates not found, after
2778 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2779 * from which we can load garbage. We then discard the garbage with
2780 * a conditional move.
2781 */
2782 ele = load_last_active(s, last, rm, esz);
2783 tcg_temp_free_i32(last);
2784
053552d3
RH
2785 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2786 ele, reg_val);
ef23cb72 2787
ef23cb72
RH
2788 tcg_temp_free_i64(cmp);
2789 tcg_temp_free_i64(ele);
2790}
2791
2792/* Compute CLAST for a Vreg. */
2793static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2794{
2795 if (sve_access_check(s)) {
2796 int esz = a->esz;
2797 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2798 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2799
2800 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2801 write_fp_dreg(s, a->rd, reg);
2802 tcg_temp_free_i64(reg);
2803 }
2804 return true;
2805}
2806
3a7be554 2807static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2808{
2809 return do_clast_fp(s, a, false);
2810}
2811
3a7be554 2812static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2813{
2814 return do_clast_fp(s, a, true);
2815}
2816
2817/* Compute CLAST for a Xreg. */
2818static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2819{
2820 TCGv_i64 reg;
2821
2822 if (!sve_access_check(s)) {
2823 return true;
2824 }
2825
2826 reg = cpu_reg(s, a->rd);
2827 switch (a->esz) {
2828 case 0:
2829 tcg_gen_ext8u_i64(reg, reg);
2830 break;
2831 case 1:
2832 tcg_gen_ext16u_i64(reg, reg);
2833 break;
2834 case 2:
2835 tcg_gen_ext32u_i64(reg, reg);
2836 break;
2837 case 3:
2838 break;
2839 default:
2840 g_assert_not_reached();
2841 }
2842
2843 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2844 return true;
2845}
2846
3a7be554 2847static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2848{
2849 return do_clast_general(s, a, false);
2850}
2851
3a7be554 2852static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2853{
2854 return do_clast_general(s, a, true);
2855}
2856
2857/* Compute LAST for a scalar. */
2858static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2859 int pg, int rm, bool before)
2860{
2861 TCGv_i32 last = tcg_temp_new_i32();
2862 TCGv_i64 ret;
2863
2864 find_last_active(s, last, esz, pg);
2865 if (before) {
2866 wrap_last_active(s, last, esz);
2867 } else {
2868 incr_last_active(s, last, esz);
2869 }
2870
2871 ret = load_last_active(s, last, rm, esz);
2872 tcg_temp_free_i32(last);
2873 return ret;
2874}
2875
2876/* Compute LAST for a Vreg. */
2877static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2878{
2879 if (sve_access_check(s)) {
2880 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2881 write_fp_dreg(s, a->rd, val);
2882 tcg_temp_free_i64(val);
2883 }
2884 return true;
2885}
2886
3a7be554 2887static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2888{
2889 return do_last_fp(s, a, false);
2890}
2891
3a7be554 2892static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2893{
2894 return do_last_fp(s, a, true);
2895}
2896
2897/* Compute LAST for a Xreg. */
2898static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2899{
2900 if (sve_access_check(s)) {
2901 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2902 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2903 tcg_temp_free_i64(val);
2904 }
2905 return true;
2906}
2907
3a7be554 2908static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2909{
2910 return do_last_general(s, a, false);
2911}
2912
3a7be554 2913static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2914{
2915 return do_last_general(s, a, true);
2916}
2917
3a7be554 2918static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2919{
2920 if (sve_access_check(s)) {
2921 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2922 }
2923 return true;
2924}
2925
3a7be554 2926static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2927{
2928 if (sve_access_check(s)) {
2929 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2930 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2931 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2932 tcg_temp_free_i64(t);
2933 }
2934 return true;
2935}
2936
817bd5c9
RH
2937static gen_helper_gvec_3 * const revb_fns[4] = {
2938 NULL, gen_helper_sve_revb_h,
2939 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2940};
2941TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2942
817bd5c9
RH
2943static gen_helper_gvec_3 * const revh_fns[4] = {
2944 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2945};
2946TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2947
817bd5c9
RH
2948TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2949 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2950
3a7be554 2951static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240 2952{
2a753d1e
RH
2953 return gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2954 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2955}
2956
75114792
SL
2957static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
2958{
2959 if (!dc_isar_feature(aa64_sve2, s)) {
2960 return false;
2961 }
2a753d1e
RH
2962 return gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2963 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
75114792
SL
2964}
2965
757f9cff
RH
2966/*
2967 *** SVE Integer Compare - Vectors Group
2968 */
2969
2970static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2971 gen_helper_gvec_flags_4 *gen_fn)
2972{
2973 TCGv_ptr pd, zn, zm, pg;
2974 unsigned vsz;
2975 TCGv_i32 t;
2976
2977 if (gen_fn == NULL) {
2978 return false;
2979 }
2980 if (!sve_access_check(s)) {
2981 return true;
2982 }
2983
2984 vsz = vec_full_reg_size(s);
392acacc 2985 t = tcg_temp_new_i32();
757f9cff
RH
2986 pd = tcg_temp_new_ptr();
2987 zn = tcg_temp_new_ptr();
2988 zm = tcg_temp_new_ptr();
2989 pg = tcg_temp_new_ptr();
2990
2991 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2992 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2993 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2994 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2995
392acacc 2996 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2997
2998 tcg_temp_free_ptr(pd);
2999 tcg_temp_free_ptr(zn);
3000 tcg_temp_free_ptr(zm);
3001 tcg_temp_free_ptr(pg);
3002
3003 do_pred_flags(t);
3004
3005 tcg_temp_free_i32(t);
3006 return true;
3007}
3008
3009#define DO_PPZZ(NAME, name) \
3a7be554 3010static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3011{ \
3012 static gen_helper_gvec_flags_4 * const fns[4] = { \
3013 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3014 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3015 }; \
3016 return do_ppzz_flags(s, a, fns[a->esz]); \
3017}
3018
3019DO_PPZZ(CMPEQ, cmpeq)
3020DO_PPZZ(CMPNE, cmpne)
3021DO_PPZZ(CMPGT, cmpgt)
3022DO_PPZZ(CMPGE, cmpge)
3023DO_PPZZ(CMPHI, cmphi)
3024DO_PPZZ(CMPHS, cmphs)
3025
3026#undef DO_PPZZ
3027
3028#define DO_PPZW(NAME, name) \
3a7be554 3029static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3030{ \
3031 static gen_helper_gvec_flags_4 * const fns[4] = { \
3032 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3033 gen_helper_sve_##name##_ppzw_s, NULL \
3034 }; \
3035 return do_ppzz_flags(s, a, fns[a->esz]); \
3036}
3037
3038DO_PPZW(CMPEQ, cmpeq)
3039DO_PPZW(CMPNE, cmpne)
3040DO_PPZW(CMPGT, cmpgt)
3041DO_PPZW(CMPGE, cmpge)
3042DO_PPZW(CMPHI, cmphi)
3043DO_PPZW(CMPHS, cmphs)
3044DO_PPZW(CMPLT, cmplt)
3045DO_PPZW(CMPLE, cmple)
3046DO_PPZW(CMPLO, cmplo)
3047DO_PPZW(CMPLS, cmpls)
3048
3049#undef DO_PPZW
3050
38cadeba
RH
3051/*
3052 *** SVE Integer Compare - Immediate Groups
3053 */
3054
3055static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3056 gen_helper_gvec_flags_3 *gen_fn)
3057{
3058 TCGv_ptr pd, zn, pg;
3059 unsigned vsz;
3060 TCGv_i32 t;
3061
3062 if (gen_fn == NULL) {
3063 return false;
3064 }
3065 if (!sve_access_check(s)) {
3066 return true;
3067 }
3068
3069 vsz = vec_full_reg_size(s);
392acacc 3070 t = tcg_temp_new_i32();
38cadeba
RH
3071 pd = tcg_temp_new_ptr();
3072 zn = tcg_temp_new_ptr();
3073 pg = tcg_temp_new_ptr();
3074
3075 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3076 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3077 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3078
392acacc 3079 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3080
3081 tcg_temp_free_ptr(pd);
3082 tcg_temp_free_ptr(zn);
3083 tcg_temp_free_ptr(pg);
3084
3085 do_pred_flags(t);
3086
3087 tcg_temp_free_i32(t);
3088 return true;
3089}
3090
3091#define DO_PPZI(NAME, name) \
3a7be554 3092static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3093{ \
3094 static gen_helper_gvec_flags_3 * const fns[4] = { \
3095 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3096 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3097 }; \
3098 return do_ppzi_flags(s, a, fns[a->esz]); \
3099}
3100
3101DO_PPZI(CMPEQ, cmpeq)
3102DO_PPZI(CMPNE, cmpne)
3103DO_PPZI(CMPGT, cmpgt)
3104DO_PPZI(CMPGE, cmpge)
3105DO_PPZI(CMPHI, cmphi)
3106DO_PPZI(CMPHS, cmphs)
3107DO_PPZI(CMPLT, cmplt)
3108DO_PPZI(CMPLE, cmple)
3109DO_PPZI(CMPLO, cmplo)
3110DO_PPZI(CMPLS, cmpls)
3111
3112#undef DO_PPZI
3113
35da316f
RH
3114/*
3115 *** SVE Partition Break Group
3116 */
3117
3118static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3119 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3120{
3121 if (!sve_access_check(s)) {
3122 return true;
3123 }
3124
3125 unsigned vsz = pred_full_reg_size(s);
3126
3127 /* Predicate sizes may be smaller and cannot use simd_desc. */
3128 TCGv_ptr d = tcg_temp_new_ptr();
3129 TCGv_ptr n = tcg_temp_new_ptr();
3130 TCGv_ptr m = tcg_temp_new_ptr();
3131 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3132 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3133
3134 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3135 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3136 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3137 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3138
3139 if (a->s) {
93418f1c
RH
3140 TCGv_i32 t = tcg_temp_new_i32();
3141 fn_s(t, d, n, m, g, desc);
35da316f 3142 do_pred_flags(t);
93418f1c 3143 tcg_temp_free_i32(t);
35da316f 3144 } else {
93418f1c 3145 fn(d, n, m, g, desc);
35da316f
RH
3146 }
3147 tcg_temp_free_ptr(d);
3148 tcg_temp_free_ptr(n);
3149 tcg_temp_free_ptr(m);
3150 tcg_temp_free_ptr(g);
35da316f
RH
3151 return true;
3152}
3153
3154static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3155 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3156{
3157 if (!sve_access_check(s)) {
3158 return true;
3159 }
3160
3161 unsigned vsz = pred_full_reg_size(s);
3162
3163 /* Predicate sizes may be smaller and cannot use simd_desc. */
3164 TCGv_ptr d = tcg_temp_new_ptr();
3165 TCGv_ptr n = tcg_temp_new_ptr();
3166 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3167 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3168
3169 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3170 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3171 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3172
3173 if (a->s) {
93418f1c
RH
3174 TCGv_i32 t = tcg_temp_new_i32();
3175 fn_s(t, d, n, g, desc);
35da316f 3176 do_pred_flags(t);
93418f1c 3177 tcg_temp_free_i32(t);
35da316f 3178 } else {
93418f1c 3179 fn(d, n, g, desc);
35da316f
RH
3180 }
3181 tcg_temp_free_ptr(d);
3182 tcg_temp_free_ptr(n);
3183 tcg_temp_free_ptr(g);
35da316f
RH
3184 return true;
3185}
3186
3a7be554 3187static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3188{
3189 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3190}
3191
3a7be554 3192static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3193{
3194 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3195}
3196
3a7be554 3197static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3198{
3199 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3200}
3201
3a7be554 3202static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3203{
3204 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3205}
3206
3a7be554 3207static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3208{
3209 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3210}
3211
3a7be554 3212static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3213{
3214 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3215}
3216
3a7be554 3217static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3218{
3219 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3220}
3221
9ee3a611
RH
3222/*
3223 *** SVE Predicate Count Group
3224 */
3225
3226static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3227{
3228 unsigned psz = pred_full_reg_size(s);
3229
3230 if (psz <= 8) {
3231 uint64_t psz_mask;
3232
3233 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3234 if (pn != pg) {
3235 TCGv_i64 g = tcg_temp_new_i64();
3236 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3237 tcg_gen_and_i64(val, val, g);
3238 tcg_temp_free_i64(g);
3239 }
3240
3241 /* Reduce the pred_esz_masks value simply to reduce the
3242 * size of the code generated here.
3243 */
3244 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3245 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3246
3247 tcg_gen_ctpop_i64(val, val);
3248 } else {
3249 TCGv_ptr t_pn = tcg_temp_new_ptr();
3250 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3251 unsigned desc = 0;
9ee3a611 3252
f556a201
RH
3253 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3254 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3255
3256 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3257 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3258
c6a59b55 3259 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3260 tcg_temp_free_ptr(t_pn);
3261 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3262 }
3263}
3264
3a7be554 3265static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3266{
3267 if (sve_access_check(s)) {
3268 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3269 }
3270 return true;
3271}
3272
3a7be554 3273static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3274{
3275 if (sve_access_check(s)) {
3276 TCGv_i64 reg = cpu_reg(s, a->rd);
3277 TCGv_i64 val = tcg_temp_new_i64();
3278
3279 do_cntp(s, val, a->esz, a->pg, a->pg);
3280 if (a->d) {
3281 tcg_gen_sub_i64(reg, reg, val);
3282 } else {
3283 tcg_gen_add_i64(reg, reg, val);
3284 }
3285 tcg_temp_free_i64(val);
3286 }
3287 return true;
3288}
3289
3a7be554 3290static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3291{
3292 if (a->esz == 0) {
3293 return false;
3294 }
3295 if (sve_access_check(s)) {
3296 unsigned vsz = vec_full_reg_size(s);
3297 TCGv_i64 val = tcg_temp_new_i64();
3298 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3299
3300 do_cntp(s, val, a->esz, a->pg, a->pg);
3301 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3302 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3303 }
3304 return true;
3305}
3306
3a7be554 3307static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3308{
3309 if (sve_access_check(s)) {
3310 TCGv_i64 reg = cpu_reg(s, a->rd);
3311 TCGv_i64 val = tcg_temp_new_i64();
3312
3313 do_cntp(s, val, a->esz, a->pg, a->pg);
3314 do_sat_addsub_32(reg, val, a->u, a->d);
3315 }
3316 return true;
3317}
3318
3a7be554 3319static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3320{
3321 if (sve_access_check(s)) {
3322 TCGv_i64 reg = cpu_reg(s, a->rd);
3323 TCGv_i64 val = tcg_temp_new_i64();
3324
3325 do_cntp(s, val, a->esz, a->pg, a->pg);
3326 do_sat_addsub_64(reg, val, a->u, a->d);
3327 }
3328 return true;
3329}
3330
3a7be554 3331static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3332{
3333 if (a->esz == 0) {
3334 return false;
3335 }
3336 if (sve_access_check(s)) {
3337 TCGv_i64 val = tcg_temp_new_i64();
3338 do_cntp(s, val, a->esz, a->pg, a->pg);
3339 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3340 }
3341 return true;
3342}
3343
caf1cefc
RH
3344/*
3345 *** SVE Integer Compare Scalars Group
3346 */
3347
3a7be554 3348static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3349{
3350 if (!sve_access_check(s)) {
3351 return true;
3352 }
3353
3354 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3355 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3356 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3357 TCGv_i64 cmp = tcg_temp_new_i64();
3358
3359 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3360 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3361 tcg_temp_free_i64(cmp);
3362
3363 /* VF = !NF & !CF. */
3364 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3365 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3366
3367 /* Both NF and VF actually look at bit 31. */
3368 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3369 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3370 return true;
3371}
3372
3a7be554 3373static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3374{
bbd0968c 3375 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3376 TCGv_i32 t2;
caf1cefc 3377 TCGv_ptr ptr;
e610906c
RH
3378 unsigned vsz = vec_full_reg_size(s);
3379 unsigned desc = 0;
caf1cefc 3380 TCGCond cond;
34688dbc
RH
3381 uint64_t maxval;
3382 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3383 bool eq = a->eq == a->lt;
caf1cefc 3384
34688dbc
RH
3385 /* The greater-than conditions are all SVE2. */
3386 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3387 return false;
3388 }
bbd0968c
RH
3389 if (!sve_access_check(s)) {
3390 return true;
3391 }
3392
3393 op0 = read_cpu_reg(s, a->rn, 1);
3394 op1 = read_cpu_reg(s, a->rm, 1);
3395
caf1cefc
RH
3396 if (!a->sf) {
3397 if (a->u) {
3398 tcg_gen_ext32u_i64(op0, op0);
3399 tcg_gen_ext32u_i64(op1, op1);
3400 } else {
3401 tcg_gen_ext32s_i64(op0, op0);
3402 tcg_gen_ext32s_i64(op1, op1);
3403 }
3404 }
3405
3406 /* For the helper, compress the different conditions into a computation
3407 * of how many iterations for which the condition is true.
caf1cefc 3408 */
bbd0968c
RH
3409 t0 = tcg_temp_new_i64();
3410 t1 = tcg_temp_new_i64();
34688dbc
RH
3411
3412 if (a->lt) {
3413 tcg_gen_sub_i64(t0, op1, op0);
3414 if (a->u) {
3415 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3416 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3417 } else {
3418 maxval = a->sf ? INT64_MAX : INT32_MAX;
3419 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3420 }
3421 } else {
3422 tcg_gen_sub_i64(t0, op0, op1);
3423 if (a->u) {
3424 maxval = 0;
3425 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3426 } else {
3427 maxval = a->sf ? INT64_MIN : INT32_MIN;
3428 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3429 }
3430 }
caf1cefc 3431
4481bbf2 3432 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3433 if (eq) {
caf1cefc
RH
3434 /* Equality means one more iteration. */
3435 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3436
34688dbc
RH
3437 /*
3438 * For the less-than while, if op1 is maxval (and the only time
3439 * the addition above could overflow), then we produce an all-true
3440 * predicate by setting the count to the vector length. This is
3441 * because the pseudocode is described as an increment + compare
3442 * loop, and the maximum integer would always compare true.
3443 * Similarly, the greater-than while has the same issue with the
3444 * minimum integer due to the decrement + compare loop.
bbd0968c 3445 */
34688dbc 3446 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3447 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3448 }
3449
bbd0968c
RH
3450 /* Bound to the maximum. */
3451 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3452
3453 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3454 tcg_gen_movi_i64(t1, 0);
3455 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3456 tcg_temp_free_i64(t1);
caf1cefc 3457
bbd0968c 3458 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3459 t2 = tcg_temp_new_i32();
3460 tcg_gen_extrl_i64_i32(t2, t0);
3461 tcg_temp_free_i64(t0);
bbd0968c
RH
3462
3463 /* Scale elements to bits. */
3464 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3465
e610906c
RH
3466 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3467 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3468
3469 ptr = tcg_temp_new_ptr();
3470 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3471
34688dbc 3472 if (a->lt) {
4481bbf2 3473 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3474 } else {
4481bbf2 3475 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3476 }
caf1cefc
RH
3477 do_pred_flags(t2);
3478
3479 tcg_temp_free_ptr(ptr);
3480 tcg_temp_free_i32(t2);
caf1cefc
RH
3481 return true;
3482}
3483
14f6dad1
RH
3484static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3485{
3486 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3487 TCGv_i32 t2;
14f6dad1
RH
3488 TCGv_ptr ptr;
3489 unsigned vsz = vec_full_reg_size(s);
3490 unsigned desc = 0;
3491
3492 if (!dc_isar_feature(aa64_sve2, s)) {
3493 return false;
3494 }
3495 if (!sve_access_check(s)) {
3496 return true;
3497 }
3498
3499 op0 = read_cpu_reg(s, a->rn, 1);
3500 op1 = read_cpu_reg(s, a->rm, 1);
3501
4481bbf2 3502 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3503 diff = tcg_temp_new_i64();
3504
3505 if (a->rw) {
3506 /* WHILERW */
3507 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3508 t1 = tcg_temp_new_i64();
3509 tcg_gen_sub_i64(diff, op0, op1);
3510 tcg_gen_sub_i64(t1, op1, op0);
3511 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3512 tcg_temp_free_i64(t1);
3513 /* Round down to a multiple of ESIZE. */
3514 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3515 /* If op1 == op0, diff == 0, and the condition is always true. */
3516 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3517 } else {
3518 /* WHILEWR */
3519 tcg_gen_sub_i64(diff, op1, op0);
3520 /* Round down to a multiple of ESIZE. */
3521 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3522 /* If op0 >= op1, diff <= 0, the condition is always true. */
3523 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3524 }
3525
3526 /* Bound to the maximum. */
3527 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3528
3529 /* Since we're bounded, pass as a 32-bit type. */
3530 t2 = tcg_temp_new_i32();
3531 tcg_gen_extrl_i64_i32(t2, diff);
3532 tcg_temp_free_i64(diff);
3533
3534 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3535 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3536
3537 ptr = tcg_temp_new_ptr();
3538 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3539
4481bbf2 3540 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3541 do_pred_flags(t2);
3542
3543 tcg_temp_free_ptr(ptr);
3544 tcg_temp_free_i32(t2);
14f6dad1
RH
3545 return true;
3546}
3547
ed491961
RH
3548/*
3549 *** SVE Integer Wide Immediate - Unpredicated Group
3550 */
3551
3a7be554 3552static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3553{
3554 if (a->esz == 0) {
3555 return false;
3556 }
3557 if (sve_access_check(s)) {
3558 unsigned vsz = vec_full_reg_size(s);
3559 int dofs = vec_full_reg_offset(s, a->rd);
3560 uint64_t imm;
3561
3562 /* Decode the VFP immediate. */
3563 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3564 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3565 }
3566 return true;
3567}
3568
3a7be554 3569static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3570{
3a7be554 3571 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3572 return false;
3573 }
3574 if (sve_access_check(s)) {
3575 unsigned vsz = vec_full_reg_size(s);
3576 int dofs = vec_full_reg_offset(s, a->rd);
3577
8711e71f 3578 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3579 }
3580 return true;
3581}
3582
3a7be554 3583static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3584{
3a7be554 3585 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3586 return false;
3587 }
3588 if (sve_access_check(s)) {
3589 unsigned vsz = vec_full_reg_size(s);
3590 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3591 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3592 }
3593 return true;
3594}
3595
3a7be554 3596static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3597{
3598 a->imm = -a->imm;
3a7be554 3599 return trans_ADD_zzi(s, a);
6e6a157d
RH
3600}
3601
3a7be554 3602static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3603{
53229a77 3604 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3605 static const GVecGen2s op[4] = {
3606 { .fni8 = tcg_gen_vec_sub8_i64,
3607 .fniv = tcg_gen_sub_vec,
3608 .fno = gen_helper_sve_subri_b,
53229a77 3609 .opt_opc = vecop_list,
6e6a157d
RH
3610 .vece = MO_8,
3611 .scalar_first = true },
3612 { .fni8 = tcg_gen_vec_sub16_i64,
3613 .fniv = tcg_gen_sub_vec,
3614 .fno = gen_helper_sve_subri_h,
53229a77 3615 .opt_opc = vecop_list,
6e6a157d
RH
3616 .vece = MO_16,
3617 .scalar_first = true },
3618 { .fni4 = tcg_gen_sub_i32,
3619 .fniv = tcg_gen_sub_vec,
3620 .fno = gen_helper_sve_subri_s,
53229a77 3621 .opt_opc = vecop_list,
6e6a157d
RH
3622 .vece = MO_32,
3623 .scalar_first = true },
3624 { .fni8 = tcg_gen_sub_i64,
3625 .fniv = tcg_gen_sub_vec,
3626 .fno = gen_helper_sve_subri_d,
53229a77 3627 .opt_opc = vecop_list,
6e6a157d
RH
3628 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3629 .vece = MO_64,
3630 .scalar_first = true }
3631 };
3632
3a7be554 3633 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3634 return false;
3635 }
3636 if (sve_access_check(s)) {
3637 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3638 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3639 vec_full_reg_offset(s, a->rn),
9fff3fcc 3640 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3641 }
3642 return true;
3643}
3644
3a7be554 3645static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3646{
3647 if (sve_access_check(s)) {
3648 unsigned vsz = vec_full_reg_size(s);
3649 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3650 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3651 }
3652 return true;
3653}
3654
3a7be554 3655static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3656{
3a7be554 3657 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3658 return false;
3659 }
3660 if (sve_access_check(s)) {
138a1f7b
RH
3661 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3662 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3663 }
3664 return true;
3665}
3666
3a7be554 3667static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3668{
3a7be554 3669 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3670}
3671
3a7be554 3672static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3673{
3a7be554 3674 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3675}
3676
3a7be554 3677static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3678{
3a7be554 3679 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3680}
3681
3a7be554 3682static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3683{
3a7be554 3684 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3685}
3686
3687static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3688{
3689 if (sve_access_check(s)) {
3690 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3691 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3692 vec_full_reg_offset(s, a->rn),
138a1f7b 3693 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3694 }
3695 return true;
3696}
3697
3698#define DO_ZZI(NAME, name) \
3a7be554 3699static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3700{ \
3701 static gen_helper_gvec_2i * const fns[4] = { \
3702 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3703 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3704 }; \
3705 return do_zzi_ool(s, a, fns[a->esz]); \
3706}
3707
3708DO_ZZI(SMAX, smax)
3709DO_ZZI(UMAX, umax)
3710DO_ZZI(SMIN, smin)
3711DO_ZZI(UMIN, umin)
3712
3713#undef DO_ZZI
3714
5f425b92
RH
3715static gen_helper_gvec_4 * const dot_fns[2][2] = {
3716 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3717 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3718};
3719TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3720 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3721
814d4c52
RH
3722/*
3723 * SVE Multiply - Indexed
3724 */
3725
f3500a25
RH
3726TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3727 gen_helper_gvec_sdot_idx_b, a)
3728TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3729 gen_helper_gvec_sdot_idx_h, a)
3730TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3731 gen_helper_gvec_udot_idx_b, a)
3732TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3733 gen_helper_gvec_udot_idx_h, a)
3734
3735TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3736 gen_helper_gvec_sudot_idx_b, a)
3737TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3738 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3739
814d4c52 3740#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3741 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3742 a->rd, a->rn, a->rm, a->index)
814d4c52 3743
af031f64
RH
3744DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3745DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3746DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3747
af031f64
RH
3748DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3749DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3750DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3751
af031f64
RH
3752DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3753DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3754DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3755
814d4c52
RH
3756#undef DO_SVE2_RRX
3757
b95f5eeb 3758#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3759 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3760 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3761
3762DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3763DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3764DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3765DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3766
3767DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3768DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3769DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3770DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3771
3772DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3773DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3774DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3775DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3776
b95f5eeb
RH
3777#undef DO_SVE2_RRX_TB
3778
8a02aac7 3779#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3780 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3781
8681eb76
RH
3782DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3783DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3784DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3785
8681eb76
RH
3786DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3787DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3788DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3789
8681eb76
RH
3790DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3791DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3792DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3793
8681eb76
RH
3794DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3795DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3796DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3797
8a02aac7
RH
3798#undef DO_SVE2_RRXR
3799
c5c455d7 3800#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3801 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3802 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3803
3804DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3805DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3806DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3807DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3808
3809DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3810DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3811DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3812DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3813
3814DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3815DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3816DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3817DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3818
3819DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3820DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3821DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3822DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3823
3824DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3825DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3826DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3827DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3828
3829DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3830DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3831DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3832DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3833
3834#undef DO_SVE2_RRXR_TB
3835
3b787ed8 3836#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3837 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3838 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3839
3840DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3841DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3842
3843DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3844DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3845
21068f39
RH
3846DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3847DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3848
3b787ed8
RH
3849#undef DO_SVE2_RRXR_ROT
3850
ca40a6e6
RH
3851/*
3852 *** SVE Floating Point Multiply-Add Indexed Group
3853 */
3854
0a82d963 3855static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
3856{
3857 static gen_helper_gvec_4_ptr * const fns[3] = {
3858 gen_helper_gvec_fmla_idx_h,
3859 gen_helper_gvec_fmla_idx_s,
3860 gen_helper_gvec_fmla_idx_d,
3861 };
3862
3863 if (sve_access_check(s)) {
3864 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3865 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3866 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3867 vec_full_reg_offset(s, a->rn),
3868 vec_full_reg_offset(s, a->rm),
3869 vec_full_reg_offset(s, a->ra),
0a82d963 3870 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
3871 fns[a->esz - 1]);
3872 tcg_temp_free_ptr(status);
3873 }
3874 return true;
3875}
3876
0a82d963
RH
3877static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3878{
3879 return do_FMLA_zzxz(s, a, false);
3880}
3881
3882static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3883{
3884 return do_FMLA_zzxz(s, a, true);
3885}
3886
ca40a6e6
RH
3887/*
3888 *** SVE Floating Point Multiply Indexed Group
3889 */
3890
3a7be554 3891static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3892{
3893 static gen_helper_gvec_3_ptr * const fns[3] = {
3894 gen_helper_gvec_fmul_idx_h,
3895 gen_helper_gvec_fmul_idx_s,
3896 gen_helper_gvec_fmul_idx_d,
3897 };
3898
3899 if (sve_access_check(s)) {
3900 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3901 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3902 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3903 vec_full_reg_offset(s, a->rn),
3904 vec_full_reg_offset(s, a->rm),
3905 status, vsz, vsz, a->index, fns[a->esz - 1]);
3906 tcg_temp_free_ptr(status);
3907 }
3908 return true;
3909}
3910
23fbe79f
RH
3911/*
3912 *** SVE Floating Point Fast Reduction Group
3913 */
3914
3915typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3916 TCGv_ptr, TCGv_i32);
3917
3918static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3919 gen_helper_fp_reduce *fn)
3920{
3921 unsigned vsz = vec_full_reg_size(s);
3922 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 3923 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3924 TCGv_ptr t_zn, t_pg, status;
3925 TCGv_i64 temp;
3926
3927 temp = tcg_temp_new_i64();
3928 t_zn = tcg_temp_new_ptr();
3929 t_pg = tcg_temp_new_ptr();
3930
3931 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3932 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3933 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3934
3935 fn(temp, t_zn, t_pg, status, t_desc);
3936 tcg_temp_free_ptr(t_zn);
3937 tcg_temp_free_ptr(t_pg);
3938 tcg_temp_free_ptr(status);
23fbe79f
RH
3939
3940 write_fp_dreg(s, a->rd, temp);
3941 tcg_temp_free_i64(temp);
3942}
3943
3944#define DO_VPZ(NAME, name) \
3a7be554 3945static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3946{ \
3947 static gen_helper_fp_reduce * const fns[3] = { \
3948 gen_helper_sve_##name##_h, \
3949 gen_helper_sve_##name##_s, \
3950 gen_helper_sve_##name##_d, \
3951 }; \
3952 if (a->esz == 0) { \
3953 return false; \
3954 } \
3955 if (sve_access_check(s)) { \
3956 do_reduce(s, a, fns[a->esz - 1]); \
3957 } \
3958 return true; \
3959}
3960
3961DO_VPZ(FADDV, faddv)
3962DO_VPZ(FMINNMV, fminnmv)
3963DO_VPZ(FMAXNMV, fmaxnmv)
3964DO_VPZ(FMINV, fminv)
3965DO_VPZ(FMAXV, fmaxv)
3966
3887c038
RH
3967/*
3968 *** SVE Floating Point Unary Operations - Unpredicated Group
3969 */
3970
3971static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3972{
3973 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3974 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3975
3976 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3977 vec_full_reg_offset(s, a->rn),
3978 status, vsz, vsz, 0, fn);
3979 tcg_temp_free_ptr(status);
3980}
3981
3a7be554 3982static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3983{
3984 static gen_helper_gvec_2_ptr * const fns[3] = {
3985 gen_helper_gvec_frecpe_h,
3986 gen_helper_gvec_frecpe_s,
3987 gen_helper_gvec_frecpe_d,
3988 };
3989 if (a->esz == 0) {
3990 return false;
3991 }
3992 if (sve_access_check(s)) {
3993 do_zz_fp(s, a, fns[a->esz - 1]);
3994 }
3995 return true;
3996}
3997
3a7be554 3998static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3999{
4000 static gen_helper_gvec_2_ptr * const fns[3] = {
4001 gen_helper_gvec_frsqrte_h,
4002 gen_helper_gvec_frsqrte_s,
4003 gen_helper_gvec_frsqrte_d,
4004 };
4005 if (a->esz == 0) {
4006 return false;
4007 }
4008 if (sve_access_check(s)) {
4009 do_zz_fp(s, a, fns[a->esz - 1]);
4010 }
4011 return true;
4012}
4013
4d2e2a03
RH
4014/*
4015 *** SVE Floating Point Compare with Zero Group
4016 */
4017
4018static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4019 gen_helper_gvec_3_ptr *fn)
4020{
4021 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4022 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4023
4024 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4025 vec_full_reg_offset(s, a->rn),
4026 pred_full_reg_offset(s, a->pg),
4027 status, vsz, vsz, 0, fn);
4028 tcg_temp_free_ptr(status);
4029}
4030
4031#define DO_PPZ(NAME, name) \
3a7be554 4032static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4033{ \
4034 static gen_helper_gvec_3_ptr * const fns[3] = { \
4035 gen_helper_sve_##name##_h, \
4036 gen_helper_sve_##name##_s, \
4037 gen_helper_sve_##name##_d, \
4038 }; \
4039 if (a->esz == 0) { \
4040 return false; \
4041 } \
4042 if (sve_access_check(s)) { \
4043 do_ppz_fp(s, a, fns[a->esz - 1]); \
4044 } \
4045 return true; \
4046}
4047
4048DO_PPZ(FCMGE_ppz0, fcmge0)
4049DO_PPZ(FCMGT_ppz0, fcmgt0)
4050DO_PPZ(FCMLE_ppz0, fcmle0)
4051DO_PPZ(FCMLT_ppz0, fcmlt0)
4052DO_PPZ(FCMEQ_ppz0, fcmeq0)
4053DO_PPZ(FCMNE_ppz0, fcmne0)
4054
4055#undef DO_PPZ
4056
67fcd9ad
RH
4057/*
4058 *** SVE floating-point trig multiply-add coefficient
4059 */
4060
3a7be554 4061static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4062{
4063 static gen_helper_gvec_3_ptr * const fns[3] = {
4064 gen_helper_sve_ftmad_h,
4065 gen_helper_sve_ftmad_s,
4066 gen_helper_sve_ftmad_d,
4067 };
4068
4069 if (a->esz == 0) {
4070 return false;
4071 }
4072 if (sve_access_check(s)) {
4073 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4074 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4075 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4076 vec_full_reg_offset(s, a->rn),
4077 vec_full_reg_offset(s, a->rm),
4078 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4079 tcg_temp_free_ptr(status);
4080 }
4081 return true;
4082}
4083
7f9ddf64
RH
4084/*
4085 *** SVE Floating Point Accumulating Reduction Group
4086 */
4087
3a7be554 4088static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4089{
4090 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4091 TCGv_ptr, TCGv_ptr, TCGv_i32);
4092 static fadda_fn * const fns[3] = {
4093 gen_helper_sve_fadda_h,
4094 gen_helper_sve_fadda_s,
4095 gen_helper_sve_fadda_d,
4096 };
4097 unsigned vsz = vec_full_reg_size(s);
4098 TCGv_ptr t_rm, t_pg, t_fpst;
4099 TCGv_i64 t_val;
4100 TCGv_i32 t_desc;
4101
4102 if (a->esz == 0) {
4103 return false;
4104 }
4105 if (!sve_access_check(s)) {
4106 return true;
4107 }
4108
4109 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4110 t_rm = tcg_temp_new_ptr();
4111 t_pg = tcg_temp_new_ptr();
4112 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4113 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4114 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4115 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
4116
4117 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4118
7f9ddf64
RH
4119 tcg_temp_free_ptr(t_fpst);
4120 tcg_temp_free_ptr(t_pg);
4121 tcg_temp_free_ptr(t_rm);
4122
4123 write_fp_dreg(s, a->rd, t_val);
4124 tcg_temp_free_i64(t_val);
4125 return true;
4126}
4127
29b80469
RH
4128/*
4129 *** SVE Floating Point Arithmetic - Unpredicated Group
4130 */
4131
4132static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4133 gen_helper_gvec_3_ptr *fn)
4134{
4135 if (fn == NULL) {
4136 return false;
4137 }
4138 if (sve_access_check(s)) {
4139 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4140 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4141 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4142 vec_full_reg_offset(s, a->rn),
4143 vec_full_reg_offset(s, a->rm),
4144 status, vsz, vsz, 0, fn);
4145 tcg_temp_free_ptr(status);
4146 }
4147 return true;
4148}
4149
4150
4151#define DO_FP3(NAME, name) \
3a7be554 4152static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4153{ \
4154 static gen_helper_gvec_3_ptr * const fns[4] = { \
4155 NULL, gen_helper_gvec_##name##_h, \
4156 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4157 }; \
4158 return do_zzz_fp(s, a, fns[a->esz]); \
4159}
4160
4161DO_FP3(FADD_zzz, fadd)
4162DO_FP3(FSUB_zzz, fsub)
4163DO_FP3(FMUL_zzz, fmul)
4164DO_FP3(FTSMUL, ftsmul)
4165DO_FP3(FRECPS, recps)
4166DO_FP3(FRSQRTS, rsqrts)
4167
4168#undef DO_FP3
4169
ec3b87c2
RH
4170/*
4171 *** SVE Floating Point Arithmetic - Predicated Group
4172 */
4173
4174static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4175 gen_helper_gvec_4_ptr *fn)
4176{
4177 if (fn == NULL) {
4178 return false;
4179 }
4180 if (sve_access_check(s)) {
4181 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4182 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4183 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4184 vec_full_reg_offset(s, a->rn),
4185 vec_full_reg_offset(s, a->rm),
4186 pred_full_reg_offset(s, a->pg),
4187 status, vsz, vsz, 0, fn);
4188 tcg_temp_free_ptr(status);
4189 }
4190 return true;
4191}
4192
4193#define DO_FP3(NAME, name) \
3a7be554 4194static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4195{ \
4196 static gen_helper_gvec_4_ptr * const fns[4] = { \
4197 NULL, gen_helper_sve_##name##_h, \
4198 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4199 }; \
4200 return do_zpzz_fp(s, a, fns[a->esz]); \
4201}
4202
4203DO_FP3(FADD_zpzz, fadd)
4204DO_FP3(FSUB_zpzz, fsub)
4205DO_FP3(FMUL_zpzz, fmul)
4206DO_FP3(FMIN_zpzz, fmin)
4207DO_FP3(FMAX_zpzz, fmax)
4208DO_FP3(FMINNM_zpzz, fminnum)
4209DO_FP3(FMAXNM_zpzz, fmaxnum)
4210DO_FP3(FABD, fabd)
4211DO_FP3(FSCALE, fscalbn)
4212DO_FP3(FDIV, fdiv)
4213DO_FP3(FMULX, fmulx)
4214
4215#undef DO_FP3
8092c6a3 4216
cc48affe
RH
4217typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4218 TCGv_i64, TCGv_ptr, TCGv_i32);
4219
4220static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4221 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4222{
4223 unsigned vsz = vec_full_reg_size(s);
4224 TCGv_ptr t_zd, t_zn, t_pg, status;
4225 TCGv_i32 desc;
4226
4227 t_zd = tcg_temp_new_ptr();
4228 t_zn = tcg_temp_new_ptr();
4229 t_pg = tcg_temp_new_ptr();
4230 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4231 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4232 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4233
cdfb22bb 4234 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4235 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4236 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4237
cc48affe
RH
4238 tcg_temp_free_ptr(status);
4239 tcg_temp_free_ptr(t_pg);
4240 tcg_temp_free_ptr(t_zn);
4241 tcg_temp_free_ptr(t_zd);
4242}
4243
4244static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4245 gen_helper_sve_fp2scalar *fn)
4246{
138a1f7b
RH
4247 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4248 tcg_constant_i64(imm), fn);
cc48affe
RH
4249}
4250
4251#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4252static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4253{ \
4254 static gen_helper_sve_fp2scalar * const fns[3] = { \
4255 gen_helper_sve_##name##_h, \
4256 gen_helper_sve_##name##_s, \
4257 gen_helper_sve_##name##_d \
4258 }; \
4259 static uint64_t const val[3][2] = { \
4260 { float16_##const0, float16_##const1 }, \
4261 { float32_##const0, float32_##const1 }, \
4262 { float64_##const0, float64_##const1 }, \
4263 }; \
4264 if (a->esz == 0) { \
4265 return false; \
4266 } \
4267 if (sve_access_check(s)) { \
4268 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4269 } \
4270 return true; \
4271}
4272
cc48affe
RH
4273DO_FP_IMM(FADD, fadds, half, one)
4274DO_FP_IMM(FSUB, fsubs, half, one)
4275DO_FP_IMM(FMUL, fmuls, half, two)
4276DO_FP_IMM(FSUBR, fsubrs, half, one)
4277DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4278DO_FP_IMM(FMINNM, fminnms, zero, one)
4279DO_FP_IMM(FMAX, fmaxs, zero, one)
4280DO_FP_IMM(FMIN, fmins, zero, one)
4281
4282#undef DO_FP_IMM
4283
abfdefd5
RH
4284static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4285 gen_helper_gvec_4_ptr *fn)
4286{
4287 if (fn == NULL) {
4288 return false;
4289 }
4290 if (sve_access_check(s)) {
4291 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4292 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4293 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4294 vec_full_reg_offset(s, a->rn),
4295 vec_full_reg_offset(s, a->rm),
4296 pred_full_reg_offset(s, a->pg),
4297 status, vsz, vsz, 0, fn);
4298 tcg_temp_free_ptr(status);
4299 }
4300 return true;
4301}
4302
4303#define DO_FPCMP(NAME, name) \
3a7be554 4304static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4305{ \
4306 static gen_helper_gvec_4_ptr * const fns[4] = { \
4307 NULL, gen_helper_sve_##name##_h, \
4308 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4309 }; \
4310 return do_fp_cmp(s, a, fns[a->esz]); \
4311}
4312
4313DO_FPCMP(FCMGE, fcmge)
4314DO_FPCMP(FCMGT, fcmgt)
4315DO_FPCMP(FCMEQ, fcmeq)
4316DO_FPCMP(FCMNE, fcmne)
4317DO_FPCMP(FCMUO, fcmuo)
4318DO_FPCMP(FACGE, facge)
4319DO_FPCMP(FACGT, facgt)
4320
4321#undef DO_FPCMP
4322
3a7be554 4323static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4324{
4325 static gen_helper_gvec_4_ptr * const fns[3] = {
4326 gen_helper_sve_fcadd_h,
4327 gen_helper_sve_fcadd_s,
4328 gen_helper_sve_fcadd_d
4329 };
4330
4331 if (a->esz == 0) {
4332 return false;
4333 }
4334 if (sve_access_check(s)) {
4335 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4336 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4337 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4338 vec_full_reg_offset(s, a->rn),
4339 vec_full_reg_offset(s, a->rm),
4340 pred_full_reg_offset(s, a->pg),
4341 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4342 tcg_temp_free_ptr(status);
4343 }
4344 return true;
4345}
4346
08975da9
RH
4347static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4348 gen_helper_gvec_5_ptr *fn)
6ceabaad 4349{
08975da9 4350 if (a->esz == 0) {
6ceabaad
RH
4351 return false;
4352 }
08975da9
RH
4353 if (sve_access_check(s)) {
4354 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4355 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4356 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4357 vec_full_reg_offset(s, a->rn),
4358 vec_full_reg_offset(s, a->rm),
4359 vec_full_reg_offset(s, a->ra),
4360 pred_full_reg_offset(s, a->pg),
4361 status, vsz, vsz, 0, fn);
4362 tcg_temp_free_ptr(status);
6ceabaad 4363 }
6ceabaad
RH
4364 return true;
4365}
4366
4367#define DO_FMLA(NAME, name) \
3a7be554 4368static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4369{ \
08975da9 4370 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4371 NULL, gen_helper_sve_##name##_h, \
4372 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4373 }; \
4374 return do_fmla(s, a, fns[a->esz]); \
4375}
4376
4377DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4378DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4379DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4380DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4381
4382#undef DO_FMLA
4383
3a7be554 4384static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4385{
08975da9
RH
4386 static gen_helper_gvec_5_ptr * const fns[4] = {
4387 NULL,
05f48bab
RH
4388 gen_helper_sve_fcmla_zpzzz_h,
4389 gen_helper_sve_fcmla_zpzzz_s,
4390 gen_helper_sve_fcmla_zpzzz_d,
4391 };
4392
4393 if (a->esz == 0) {
4394 return false;
4395 }
4396 if (sve_access_check(s)) {
4397 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4398 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4399 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4400 vec_full_reg_offset(s, a->rn),
4401 vec_full_reg_offset(s, a->rm),
4402 vec_full_reg_offset(s, a->ra),
4403 pred_full_reg_offset(s, a->pg),
4404 status, vsz, vsz, a->rot, fns[a->esz]);
4405 tcg_temp_free_ptr(status);
05f48bab
RH
4406 }
4407 return true;
4408}
4409
3a7be554 4410static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4411{
636ddeb1 4412 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4413 gen_helper_gvec_fcmlah_idx,
4414 gen_helper_gvec_fcmlas_idx,
4415 };
4416
4417 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4418 tcg_debug_assert(a->rd == a->ra);
4419 if (sve_access_check(s)) {
4420 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4421 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4422 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4423 vec_full_reg_offset(s, a->rn),
4424 vec_full_reg_offset(s, a->rm),
636ddeb1 4425 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4426 status, vsz, vsz,
4427 a->index * 4 + a->rot,
4428 fns[a->esz - 1]);
4429 tcg_temp_free_ptr(status);
4430 }
4431 return true;
4432}
4433
8092c6a3
RH
4434/*
4435 *** SVE Floating Point Unary Operations Predicated Group
4436 */
4437
4438static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4439 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4440{
4441 if (sve_access_check(s)) {
4442 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4443 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4444 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4445 vec_full_reg_offset(s, rn),
4446 pred_full_reg_offset(s, pg),
4447 status, vsz, vsz, 0, fn);
4448 tcg_temp_free_ptr(status);
4449 }
4450 return true;
4451}
4452
3a7be554 4453static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4454{
e4ab5124 4455 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4456}
4457
3a7be554 4458static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4459{
4460 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4461}
4462
d29b17ca
RH
4463static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4464{
4465 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4466 return false;
4467 }
4468 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4469}
4470
3a7be554 4471static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4472{
e4ab5124 4473 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4474}
4475
3a7be554 4476static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4477{
4478 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4479}
4480
3a7be554 4481static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4482{
4483 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4484}
4485
3a7be554 4486static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4487{
4488 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4489}
4490
3a7be554 4491static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4492{
4493 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4494}
4495
3a7be554 4496static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4497{
4498 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4499}
4500
3a7be554 4501static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4502{
4503 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4504}
4505
3a7be554 4506static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4507{
4508 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4509}
4510
3a7be554 4511static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4512{
4513 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4514}
4515
3a7be554 4516static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4517{
4518 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4519}
4520
3a7be554 4521static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4522{
4523 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4524}
4525
3a7be554 4526static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4527{
4528 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4529}
4530
3a7be554 4531static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4532{
4533 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4534}
4535
3a7be554 4536static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4537{
4538 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4539}
4540
3a7be554 4541static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4542{
4543 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4544}
4545
3a7be554 4546static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4547{
4548 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4549}
4550
3a7be554 4551static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4552{
4553 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4554}
4555
3a7be554 4556static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4557{
4558 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4559}
4560
cda3c753
RH
4561static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4562 gen_helper_sve_frint_h,
4563 gen_helper_sve_frint_s,
4564 gen_helper_sve_frint_d
4565};
4566
3a7be554 4567static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4568{
4569 if (a->esz == 0) {
4570 return false;
4571 }
4572 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4573 frint_fns[a->esz - 1]);
4574}
4575
3a7be554 4576static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4577{
4578 static gen_helper_gvec_3_ptr * const fns[3] = {
4579 gen_helper_sve_frintx_h,
4580 gen_helper_sve_frintx_s,
4581 gen_helper_sve_frintx_d
4582 };
4583 if (a->esz == 0) {
4584 return false;
4585 }
4586 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4587}
4588
95365277
SL
4589static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4590 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4591{
cda3c753
RH
4592 if (sve_access_check(s)) {
4593 unsigned vsz = vec_full_reg_size(s);
4594 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4595 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4596
4597 gen_helper_set_rmode(tmode, tmode, status);
4598
4599 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4600 vec_full_reg_offset(s, a->rn),
4601 pred_full_reg_offset(s, a->pg),
95365277 4602 status, vsz, vsz, 0, fn);
cda3c753
RH
4603
4604 gen_helper_set_rmode(tmode, tmode, status);
4605 tcg_temp_free_i32(tmode);
4606 tcg_temp_free_ptr(status);
4607 }
4608 return true;
4609}
4610
3a7be554 4611static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4612{
95365277
SL
4613 if (a->esz == 0) {
4614 return false;
4615 }
4616 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4617}
4618
3a7be554 4619static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4620{
95365277
SL
4621 if (a->esz == 0) {
4622 return false;
4623 }
4624 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4625}
4626
3a7be554 4627static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4628{
95365277
SL
4629 if (a->esz == 0) {
4630 return false;
4631 }
4632 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4633}
4634
3a7be554 4635static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4636{
95365277
SL
4637 if (a->esz == 0) {
4638 return false;
4639 }
4640 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4641}
4642
3a7be554 4643static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4644{
95365277
SL
4645 if (a->esz == 0) {
4646 return false;
4647 }
4648 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4649}
4650
3a7be554 4651static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4652{
4653 static gen_helper_gvec_3_ptr * const fns[3] = {
4654 gen_helper_sve_frecpx_h,
4655 gen_helper_sve_frecpx_s,
4656 gen_helper_sve_frecpx_d
4657 };
4658 if (a->esz == 0) {
4659 return false;
4660 }
4661 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4662}
4663
3a7be554 4664static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4665{
4666 static gen_helper_gvec_3_ptr * const fns[3] = {
4667 gen_helper_sve_fsqrt_h,
4668 gen_helper_sve_fsqrt_s,
4669 gen_helper_sve_fsqrt_d
4670 };
4671 if (a->esz == 0) {
4672 return false;
4673 }
4674 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4675}
4676
3a7be554 4677static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4678{
4679 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4680}
4681
3a7be554 4682static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4683{
4684 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4685}
4686
3a7be554 4687static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4688{
4689 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4690}
4691
3a7be554 4692static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4693{
4694 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4695}
4696
3a7be554 4697static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4698{
4699 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4700}
4701
3a7be554 4702static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4703{
4704 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4705}
4706
3a7be554 4707static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4708{
4709 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4710}
4711
3a7be554 4712static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4713{
4714 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4715}
4716
3a7be554 4717static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4718{
4719 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4720}
4721
3a7be554 4722static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4723{
4724 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4725}
4726
3a7be554 4727static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4728{
4729 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4730}
4731
3a7be554 4732static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4733{
4734 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4735}
4736
3a7be554 4737static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4738{
4739 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4740}
4741
3a7be554 4742static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4743{
4744 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4745}
4746
d1822297
RH
4747/*
4748 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4749 */
4750
4751/* Subroutine loading a vector register at VOFS of LEN bytes.
4752 * The load should begin at the address Rn + IMM.
4753 */
4754
19f2acc9 4755static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4756{
19f2acc9
RH
4757 int len_align = QEMU_ALIGN_DOWN(len, 8);
4758 int len_remain = len % 8;
4759 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4760 int midx = get_mem_index(s);
b2aa8879 4761 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4762
b2aa8879
RH
4763 dirty_addr = tcg_temp_new_i64();
4764 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4765 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4766 tcg_temp_free_i64(dirty_addr);
d1822297 4767
b2aa8879
RH
4768 /*
4769 * Note that unpredicated load/store of vector/predicate registers
d1822297 4770 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4771 * operations on larger quantities.
d1822297
RH
4772 * Attempt to keep code expansion to a minimum by limiting the
4773 * amount of unrolling done.
4774 */
4775 if (nparts <= 4) {
4776 int i;
4777
b2aa8879 4778 t0 = tcg_temp_new_i64();
d1822297 4779 for (i = 0; i < len_align; i += 8) {
fc313c64 4780 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4781 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4782 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4783 }
b2aa8879 4784 tcg_temp_free_i64(t0);
d1822297
RH
4785 } else {
4786 TCGLabel *loop = gen_new_label();
4787 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4788
b2aa8879
RH
4789 /* Copy the clean address into a local temp, live across the loop. */
4790 t0 = clean_addr;
4b4dc975 4791 clean_addr = new_tmp_a64_local(s);
b2aa8879 4792 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4793
b2aa8879 4794 gen_set_label(loop);
d1822297 4795
b2aa8879 4796 t0 = tcg_temp_new_i64();
fc313c64 4797 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4798 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4799
b2aa8879 4800 tp = tcg_temp_new_ptr();
d1822297
RH
4801 tcg_gen_add_ptr(tp, cpu_env, i);
4802 tcg_gen_addi_ptr(i, i, 8);
4803 tcg_gen_st_i64(t0, tp, vofs);
4804 tcg_temp_free_ptr(tp);
b2aa8879 4805 tcg_temp_free_i64(t0);
d1822297
RH
4806
4807 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4808 tcg_temp_free_ptr(i);
4809 }
4810
b2aa8879
RH
4811 /*
4812 * Predicate register loads can be any multiple of 2.
d1822297
RH
4813 * Note that we still store the entire 64-bit unit into cpu_env.
4814 */
4815 if (len_remain) {
b2aa8879 4816 t0 = tcg_temp_new_i64();
d1822297
RH
4817 switch (len_remain) {
4818 case 2:
4819 case 4:
4820 case 8:
b2aa8879
RH
4821 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4822 MO_LE | ctz32(len_remain));
d1822297
RH
4823 break;
4824
4825 case 6:
4826 t1 = tcg_temp_new_i64();
b2aa8879
RH
4827 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4828 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4829 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4830 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4831 tcg_temp_free_i64(t1);
4832 break;
4833
4834 default:
4835 g_assert_not_reached();
4836 }
4837 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4838 tcg_temp_free_i64(t0);
d1822297 4839 }
d1822297
RH
4840}
4841
5047c204 4842/* Similarly for stores. */
19f2acc9 4843static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4844{
19f2acc9
RH
4845 int len_align = QEMU_ALIGN_DOWN(len, 8);
4846 int len_remain = len % 8;
4847 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4848 int midx = get_mem_index(s);
bba87d0a 4849 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4850
bba87d0a
RH
4851 dirty_addr = tcg_temp_new_i64();
4852 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4853 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4854 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4855
4856 /* Note that unpredicated load/store of vector/predicate registers
4857 * are defined as a stream of bytes, which equates to little-endian
4858 * operations on larger quantities. There is no nice way to force
4859 * a little-endian store for aarch64_be-linux-user out of line.
4860 *
4861 * Attempt to keep code expansion to a minimum by limiting the
4862 * amount of unrolling done.
4863 */
4864 if (nparts <= 4) {
4865 int i;
4866
bba87d0a 4867 t0 = tcg_temp_new_i64();
5047c204
RH
4868 for (i = 0; i < len_align; i += 8) {
4869 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4870 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4871 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4872 }
bba87d0a 4873 tcg_temp_free_i64(t0);
5047c204
RH
4874 } else {
4875 TCGLabel *loop = gen_new_label();
bba87d0a 4876 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4877
bba87d0a
RH
4878 /* Copy the clean address into a local temp, live across the loop. */
4879 t0 = clean_addr;
4b4dc975 4880 clean_addr = new_tmp_a64_local(s);
bba87d0a 4881 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4882
bba87d0a 4883 gen_set_label(loop);
5047c204 4884
bba87d0a
RH
4885 t0 = tcg_temp_new_i64();
4886 tp = tcg_temp_new_ptr();
4887 tcg_gen_add_ptr(tp, cpu_env, i);
4888 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4889 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4890 tcg_temp_free_ptr(tp);
4891
fc313c64 4892 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4893 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4894 tcg_temp_free_i64(t0);
5047c204
RH
4895
4896 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4897 tcg_temp_free_ptr(i);
4898 }
4899
4900 /* Predicate register stores can be any multiple of 2. */
4901 if (len_remain) {
bba87d0a 4902 t0 = tcg_temp_new_i64();
5047c204 4903 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4904
4905 switch (len_remain) {
4906 case 2:
4907 case 4:
4908 case 8:
bba87d0a
RH
4909 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4910 MO_LE | ctz32(len_remain));
5047c204
RH
4911 break;
4912
4913 case 6:
bba87d0a
RH
4914 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4915 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4916 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4917 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4918 break;
4919
4920 default:
4921 g_assert_not_reached();
4922 }
bba87d0a 4923 tcg_temp_free_i64(t0);
5047c204 4924 }
5047c204
RH
4925}
4926
3a7be554 4927static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4928{
4929 if (sve_access_check(s)) {
4930 int size = vec_full_reg_size(s);
4931 int off = vec_full_reg_offset(s, a->rd);
4932 do_ldr(s, off, size, a->rn, a->imm * size);
4933 }
4934 return true;
4935}
4936
3a7be554 4937static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4938{
4939 if (sve_access_check(s)) {
4940 int size = pred_full_reg_size(s);
4941 int off = pred_full_reg_offset(s, a->rd);
4942 do_ldr(s, off, size, a->rn, a->imm * size);
4943 }
4944 return true;
4945}
c4e7c493 4946
3a7be554 4947static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4948{
4949 if (sve_access_check(s)) {
4950 int size = vec_full_reg_size(s);
4951 int off = vec_full_reg_offset(s, a->rd);
4952 do_str(s, off, size, a->rn, a->imm * size);
4953 }
4954 return true;
4955}
4956
3a7be554 4957static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4958{
4959 if (sve_access_check(s)) {
4960 int size = pred_full_reg_size(s);
4961 int off = pred_full_reg_offset(s, a->rd);
4962 do_str(s, off, size, a->rn, a->imm * size);
4963 }
4964 return true;
4965}
4966
c4e7c493
RH
4967/*
4968 *** SVE Memory - Contiguous Load Group
4969 */
4970
4971/* The memory mode of the dtype. */
14776ab5 4972static const MemOp dtype_mop[16] = {
c4e7c493
RH
4973 MO_UB, MO_UB, MO_UB, MO_UB,
4974 MO_SL, MO_UW, MO_UW, MO_UW,
4975 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4976 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4977};
4978
4979#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4980
4981/* The vector element size of dtype. */
4982static const uint8_t dtype_esz[16] = {
4983 0, 1, 2, 3,
4984 3, 1, 2, 3,
4985 3, 2, 2, 3,
4986 3, 2, 1, 3
4987};
4988
4989static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4990 int dtype, uint32_t mte_n, bool is_write,
4991 gen_helper_gvec_mem *fn)
c4e7c493
RH
4992{
4993 unsigned vsz = vec_full_reg_size(s);
4994 TCGv_ptr t_pg;
206adacf 4995 int desc = 0;
c4e7c493 4996
206adacf
RH
4997 /*
4998 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4999 * registers as pointers, so encode the regno into the data field.
5000 * For consistency, do this even for LD1.
5001 */
9473d0ec 5002 if (s->mte_active[0]) {
206adacf
RH
5003 int msz = dtype_msz(dtype);
5004
5005 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5006 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5007 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5008 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5009 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5010 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5011 } else {
5012 addr = clean_data_tbi(s, addr);
206adacf 5013 }
9473d0ec 5014
206adacf 5015 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
5016 t_pg = tcg_temp_new_ptr();
5017
5018 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 5019 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
5020
5021 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
5022}
5023
c182c6db
RH
5024/* Indexed by [mte][be][dtype][nreg] */
5025static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5026 { /* mte inactive, little-endian */
5027 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5028 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5029 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5030 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5031 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5032
5033 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5034 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5035 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5036 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5037 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5038
5039 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5040 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5041 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5042 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5043 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5044
5045 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5046 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5047 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5048 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5049 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5050
5051 /* mte inactive, big-endian */
5052 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5053 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5054 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5055 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5056 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5057
5058 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5059 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5060 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5061 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5062 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5063
5064 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5065 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5066 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5067 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5068 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5069
5070 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5071 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5072 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5073 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5074 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5075
5076 { /* mte active, little-endian */
5077 { { gen_helper_sve_ld1bb_r_mte,
5078 gen_helper_sve_ld2bb_r_mte,
5079 gen_helper_sve_ld3bb_r_mte,
5080 gen_helper_sve_ld4bb_r_mte },
5081 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5082 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5083 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5084
5085 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5086 { gen_helper_sve_ld1hh_le_r_mte,
5087 gen_helper_sve_ld2hh_le_r_mte,
5088 gen_helper_sve_ld3hh_le_r_mte,
5089 gen_helper_sve_ld4hh_le_r_mte },
5090 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5091 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5092
5093 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5094 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5095 { gen_helper_sve_ld1ss_le_r_mte,
5096 gen_helper_sve_ld2ss_le_r_mte,
5097 gen_helper_sve_ld3ss_le_r_mte,
5098 gen_helper_sve_ld4ss_le_r_mte },
5099 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5100
5101 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5102 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5103 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5104 { gen_helper_sve_ld1dd_le_r_mte,
5105 gen_helper_sve_ld2dd_le_r_mte,
5106 gen_helper_sve_ld3dd_le_r_mte,
5107 gen_helper_sve_ld4dd_le_r_mte } },
5108
5109 /* mte active, big-endian */
5110 { { gen_helper_sve_ld1bb_r_mte,
5111 gen_helper_sve_ld2bb_r_mte,
5112 gen_helper_sve_ld3bb_r_mte,
5113 gen_helper_sve_ld4bb_r_mte },
5114 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5115 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5116 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5117
5118 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5119 { gen_helper_sve_ld1hh_be_r_mte,
5120 gen_helper_sve_ld2hh_be_r_mte,
5121 gen_helper_sve_ld3hh_be_r_mte,
5122 gen_helper_sve_ld4hh_be_r_mte },
5123 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5124 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5125
5126 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5127 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5128 { gen_helper_sve_ld1ss_be_r_mte,
5129 gen_helper_sve_ld2ss_be_r_mte,
5130 gen_helper_sve_ld3ss_be_r_mte,
5131 gen_helper_sve_ld4ss_be_r_mte },
5132 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5133
5134 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5135 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5136 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5137 { gen_helper_sve_ld1dd_be_r_mte,
5138 gen_helper_sve_ld2dd_be_r_mte,
5139 gen_helper_sve_ld3dd_be_r_mte,
5140 gen_helper_sve_ld4dd_be_r_mte } } },
5141};
5142
c4e7c493
RH
5143static void do_ld_zpa(DisasContext *s, int zt, int pg,
5144 TCGv_i64 addr, int dtype, int nreg)
5145{
206adacf 5146 gen_helper_gvec_mem *fn
c182c6db 5147 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5148
206adacf
RH
5149 /*
5150 * While there are holes in the table, they are not
c4e7c493
RH
5151 * accessible via the instruction encoding.
5152 */
5153 assert(fn != NULL);
206adacf 5154 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5155}
5156
3a7be554 5157static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5158{
5159 if (a->rm == 31) {
5160 return false;
5161 }
5162 if (sve_access_check(s)) {
5163 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5164 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5165 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5166 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5167 }
5168 return true;
5169}
5170
3a7be554 5171static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5172{
5173 if (sve_access_check(s)) {
5174 int vsz = vec_full_reg_size(s);
5175 int elements = vsz >> dtype_esz[a->dtype];
5176 TCGv_i64 addr = new_tmp_a64(s);
5177
5178 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5179 (a->imm * elements * (a->nreg + 1))
5180 << dtype_msz(a->dtype));
5181 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5182 }
5183 return true;
5184}
e2654d75 5185
3a7be554 5186static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5187{
aa13f7c3
RH
5188 static gen_helper_gvec_mem * const fns[2][2][16] = {
5189 { /* mte inactive, little-endian */
5190 { gen_helper_sve_ldff1bb_r,
5191 gen_helper_sve_ldff1bhu_r,
5192 gen_helper_sve_ldff1bsu_r,
5193 gen_helper_sve_ldff1bdu_r,
5194
5195 gen_helper_sve_ldff1sds_le_r,
5196 gen_helper_sve_ldff1hh_le_r,
5197 gen_helper_sve_ldff1hsu_le_r,
5198 gen_helper_sve_ldff1hdu_le_r,
5199
5200 gen_helper_sve_ldff1hds_le_r,
5201 gen_helper_sve_ldff1hss_le_r,
5202 gen_helper_sve_ldff1ss_le_r,
5203 gen_helper_sve_ldff1sdu_le_r,
5204
5205 gen_helper_sve_ldff1bds_r,
5206 gen_helper_sve_ldff1bss_r,
5207 gen_helper_sve_ldff1bhs_r,
5208 gen_helper_sve_ldff1dd_le_r },
5209
5210 /* mte inactive, big-endian */
5211 { gen_helper_sve_ldff1bb_r,
5212 gen_helper_sve_ldff1bhu_r,
5213 gen_helper_sve_ldff1bsu_r,
5214 gen_helper_sve_ldff1bdu_r,
5215
5216 gen_helper_sve_ldff1sds_be_r,
5217 gen_helper_sve_ldff1hh_be_r,
5218 gen_helper_sve_ldff1hsu_be_r,
5219 gen_helper_sve_ldff1hdu_be_r,
5220
5221 gen_helper_sve_ldff1hds_be_r,
5222 gen_helper_sve_ldff1hss_be_r,
5223 gen_helper_sve_ldff1ss_be_r,
5224 gen_helper_sve_ldff1sdu_be_r,
5225
5226 gen_helper_sve_ldff1bds_r,
5227 gen_helper_sve_ldff1bss_r,
5228 gen_helper_sve_ldff1bhs_r,
5229 gen_helper_sve_ldff1dd_be_r } },
5230
5231 { /* mte active, little-endian */
5232 { gen_helper_sve_ldff1bb_r_mte,
5233 gen_helper_sve_ldff1bhu_r_mte,
5234 gen_helper_sve_ldff1bsu_r_mte,
5235 gen_helper_sve_ldff1bdu_r_mte,
5236
5237 gen_helper_sve_ldff1sds_le_r_mte,
5238 gen_helper_sve_ldff1hh_le_r_mte,
5239 gen_helper_sve_ldff1hsu_le_r_mte,
5240 gen_helper_sve_ldff1hdu_le_r_mte,
5241
5242 gen_helper_sve_ldff1hds_le_r_mte,
5243 gen_helper_sve_ldff1hss_le_r_mte,
5244 gen_helper_sve_ldff1ss_le_r_mte,
5245 gen_helper_sve_ldff1sdu_le_r_mte,
5246
5247 gen_helper_sve_ldff1bds_r_mte,
5248 gen_helper_sve_ldff1bss_r_mte,
5249 gen_helper_sve_ldff1bhs_r_mte,
5250 gen_helper_sve_ldff1dd_le_r_mte },
5251
5252 /* mte active, big-endian */
5253 { gen_helper_sve_ldff1bb_r_mte,
5254 gen_helper_sve_ldff1bhu_r_mte,
5255 gen_helper_sve_ldff1bsu_r_mte,
5256 gen_helper_sve_ldff1bdu_r_mte,
5257
5258 gen_helper_sve_ldff1sds_be_r_mte,
5259 gen_helper_sve_ldff1hh_be_r_mte,
5260 gen_helper_sve_ldff1hsu_be_r_mte,
5261 gen_helper_sve_ldff1hdu_be_r_mte,
5262
5263 gen_helper_sve_ldff1hds_be_r_mte,
5264 gen_helper_sve_ldff1hss_be_r_mte,
5265 gen_helper_sve_ldff1ss_be_r_mte,
5266 gen_helper_sve_ldff1sdu_be_r_mte,
5267
5268 gen_helper_sve_ldff1bds_r_mte,
5269 gen_helper_sve_ldff1bss_r_mte,
5270 gen_helper_sve_ldff1bhs_r_mte,
5271 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5272 };
5273
5274 if (sve_access_check(s)) {
5275 TCGv_i64 addr = new_tmp_a64(s);
5276 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5277 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5278 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5279 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5280 }
5281 return true;
5282}
5283
3a7be554 5284static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5285{
aa13f7c3
RH
5286 static gen_helper_gvec_mem * const fns[2][2][16] = {
5287 { /* mte inactive, little-endian */
5288 { gen_helper_sve_ldnf1bb_r,
5289 gen_helper_sve_ldnf1bhu_r,
5290 gen_helper_sve_ldnf1bsu_r,
5291 gen_helper_sve_ldnf1bdu_r,
5292
5293 gen_helper_sve_ldnf1sds_le_r,
5294 gen_helper_sve_ldnf1hh_le_r,
5295 gen_helper_sve_ldnf1hsu_le_r,
5296 gen_helper_sve_ldnf1hdu_le_r,
5297
5298 gen_helper_sve_ldnf1hds_le_r,
5299 gen_helper_sve_ldnf1hss_le_r,
5300 gen_helper_sve_ldnf1ss_le_r,
5301 gen_helper_sve_ldnf1sdu_le_r,
5302
5303 gen_helper_sve_ldnf1bds_r,
5304 gen_helper_sve_ldnf1bss_r,
5305 gen_helper_sve_ldnf1bhs_r,
5306 gen_helper_sve_ldnf1dd_le_r },
5307
5308 /* mte inactive, big-endian */
5309 { gen_helper_sve_ldnf1bb_r,
5310 gen_helper_sve_ldnf1bhu_r,
5311 gen_helper_sve_ldnf1bsu_r,
5312 gen_helper_sve_ldnf1bdu_r,
5313
5314 gen_helper_sve_ldnf1sds_be_r,
5315 gen_helper_sve_ldnf1hh_be_r,
5316 gen_helper_sve_ldnf1hsu_be_r,
5317 gen_helper_sve_ldnf1hdu_be_r,
5318
5319 gen_helper_sve_ldnf1hds_be_r,
5320 gen_helper_sve_ldnf1hss_be_r,
5321 gen_helper_sve_ldnf1ss_be_r,
5322 gen_helper_sve_ldnf1sdu_be_r,
5323
5324 gen_helper_sve_ldnf1bds_r,
5325 gen_helper_sve_ldnf1bss_r,
5326 gen_helper_sve_ldnf1bhs_r,
5327 gen_helper_sve_ldnf1dd_be_r } },
5328
5329 { /* mte inactive, little-endian */
5330 { gen_helper_sve_ldnf1bb_r_mte,
5331 gen_helper_sve_ldnf1bhu_r_mte,
5332 gen_helper_sve_ldnf1bsu_r_mte,
5333 gen_helper_sve_ldnf1bdu_r_mte,
5334
5335 gen_helper_sve_ldnf1sds_le_r_mte,
5336 gen_helper_sve_ldnf1hh_le_r_mte,
5337 gen_helper_sve_ldnf1hsu_le_r_mte,
5338 gen_helper_sve_ldnf1hdu_le_r_mte,
5339
5340 gen_helper_sve_ldnf1hds_le_r_mte,
5341 gen_helper_sve_ldnf1hss_le_r_mte,
5342 gen_helper_sve_ldnf1ss_le_r_mte,
5343 gen_helper_sve_ldnf1sdu_le_r_mte,
5344
5345 gen_helper_sve_ldnf1bds_r_mte,
5346 gen_helper_sve_ldnf1bss_r_mte,
5347 gen_helper_sve_ldnf1bhs_r_mte,
5348 gen_helper_sve_ldnf1dd_le_r_mte },
5349
5350 /* mte inactive, big-endian */
5351 { gen_helper_sve_ldnf1bb_r_mte,
5352 gen_helper_sve_ldnf1bhu_r_mte,
5353 gen_helper_sve_ldnf1bsu_r_mte,
5354 gen_helper_sve_ldnf1bdu_r_mte,
5355
5356 gen_helper_sve_ldnf1sds_be_r_mte,
5357 gen_helper_sve_ldnf1hh_be_r_mte,
5358 gen_helper_sve_ldnf1hsu_be_r_mte,
5359 gen_helper_sve_ldnf1hdu_be_r_mte,
5360
5361 gen_helper_sve_ldnf1hds_be_r_mte,
5362 gen_helper_sve_ldnf1hss_be_r_mte,
5363 gen_helper_sve_ldnf1ss_be_r_mte,
5364 gen_helper_sve_ldnf1sdu_be_r_mte,
5365
5366 gen_helper_sve_ldnf1bds_r_mte,
5367 gen_helper_sve_ldnf1bss_r_mte,
5368 gen_helper_sve_ldnf1bhs_r_mte,
5369 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5370 };
5371
5372 if (sve_access_check(s)) {
5373 int vsz = vec_full_reg_size(s);
5374 int elements = vsz >> dtype_esz[a->dtype];
5375 int off = (a->imm * elements) << dtype_msz(a->dtype);
5376 TCGv_i64 addr = new_tmp_a64(s);
5377
5378 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5379 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5380 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5381 }
5382 return true;
5383}
1a039c7e 5384
c182c6db 5385static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5386{
05abe304
RH
5387 unsigned vsz = vec_full_reg_size(s);
5388 TCGv_ptr t_pg;
7924d239 5389 int poff;
05abe304
RH
5390
5391 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5392 poff = pred_full_reg_offset(s, pg);
5393 if (vsz > 16) {
5394 /*
5395 * Zero-extend the first 16 bits of the predicate into a temporary.
5396 * This avoids triggering an assert making sure we don't have bits
5397 * set within a predicate beyond VQ, but we have lowered VQ to 1
5398 * for this load operation.
5399 */
5400 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5401#if HOST_BIG_ENDIAN
2a99ab2b
RH
5402 poff += 6;
5403#endif
5404 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5405
5406 poff = offsetof(CPUARMState, vfp.preg_tmp);
5407 tcg_gen_st_i64(tmp, cpu_env, poff);
5408 tcg_temp_free_i64(tmp);
5409 }
5410
05abe304 5411 t_pg = tcg_temp_new_ptr();
2a99ab2b 5412 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5413
c182c6db
RH
5414 gen_helper_gvec_mem *fn
5415 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5416 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5417
5418 tcg_temp_free_ptr(t_pg);
05abe304
RH
5419
5420 /* Replicate that first quadword. */
5421 if (vsz > 16) {
7924d239
RH
5422 int doff = vec_full_reg_offset(s, zt);
5423 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5424 }
5425}
5426
3a7be554 5427static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5428{
5429 if (a->rm == 31) {
5430 return false;
5431 }
5432 if (sve_access_check(s)) {
5433 int msz = dtype_msz(a->dtype);
5434 TCGv_i64 addr = new_tmp_a64(s);
5435 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5436 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5437 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5438 }
5439 return true;
5440}
5441
3a7be554 5442static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5443{
5444 if (sve_access_check(s)) {
5445 TCGv_i64 addr = new_tmp_a64(s);
5446 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5447 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5448 }
5449 return true;
5450}
5451
12c563f6
RH
5452static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5453{
5454 unsigned vsz = vec_full_reg_size(s);
5455 unsigned vsz_r32;
5456 TCGv_ptr t_pg;
5457 int poff, doff;
5458
5459 if (vsz < 32) {
5460 /*
5461 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5462 * in the ARM pseudocode, which is the sve_access_check() done
5463 * in our caller. We should not now return false from the caller.
5464 */
5465 unallocated_encoding(s);
5466 return;
5467 }
5468
5469 /* Load the first octaword using the normal predicated load helpers. */
5470
5471 poff = pred_full_reg_offset(s, pg);
5472 if (vsz > 32) {
5473 /*
5474 * Zero-extend the first 32 bits of the predicate into a temporary.
5475 * This avoids triggering an assert making sure we don't have bits
5476 * set within a predicate beyond VQ, but we have lowered VQ to 2
5477 * for this load operation.
5478 */
5479 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5480#if HOST_BIG_ENDIAN
12c563f6
RH
5481 poff += 4;
5482#endif
5483 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5484
5485 poff = offsetof(CPUARMState, vfp.preg_tmp);
5486 tcg_gen_st_i64(tmp, cpu_env, poff);
5487 tcg_temp_free_i64(tmp);
5488 }
5489
5490 t_pg = tcg_temp_new_ptr();
5491 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5492
5493 gen_helper_gvec_mem *fn
5494 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5495 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5496
5497 tcg_temp_free_ptr(t_pg);
5498
5499 /*
5500 * Replicate that first octaword.
5501 * The replication happens in units of 32; if the full vector size
5502 * is not a multiple of 32, the final bits are zeroed.
5503 */
5504 doff = vec_full_reg_offset(s, zt);
5505 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5506 if (vsz >= 64) {
5507 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5508 }
5509 vsz -= vsz_r32;
5510 if (vsz) {
5511 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5512 }
5513}
5514
5515static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5516{
5517 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5518 return false;
5519 }
5520 if (a->rm == 31) {
5521 return false;
5522 }
5523 if (sve_access_check(s)) {
5524 TCGv_i64 addr = new_tmp_a64(s);
5525 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5526 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5527 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5528 }
5529 return true;
5530}
5531
5532static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5533{
5534 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5535 return false;
5536 }
5537 if (sve_access_check(s)) {
5538 TCGv_i64 addr = new_tmp_a64(s);
5539 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5540 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5541 }
5542 return true;
5543}
5544
68459864 5545/* Load and broadcast element. */
3a7be554 5546static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5547{
68459864
RH
5548 unsigned vsz = vec_full_reg_size(s);
5549 unsigned psz = pred_full_reg_size(s);
5550 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5551 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5552 TCGLabel *over;
4ac430e1 5553 TCGv_i64 temp, clean_addr;
68459864 5554
c0ed9166
RH
5555 if (!sve_access_check(s)) {
5556 return true;
5557 }
5558
5559 over = gen_new_label();
5560
68459864
RH
5561 /* If the guarding predicate has no bits set, no load occurs. */
5562 if (psz <= 8) {
5563 /* Reduce the pred_esz_masks value simply to reduce the
5564 * size of the code generated here.
5565 */
5566 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5567 temp = tcg_temp_new_i64();
5568 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5569 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5570 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5571 tcg_temp_free_i64(temp);
5572 } else {
5573 TCGv_i32 t32 = tcg_temp_new_i32();
5574 find_last_active(s, t32, esz, a->pg);
5575 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5576 tcg_temp_free_i32(t32);
5577 }
5578
5579 /* Load the data. */
5580 temp = tcg_temp_new_i64();
d0e372b0 5581 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5582 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5583
5584 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5585 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5586
5587 /* Broadcast to *all* elements. */
5588 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5589 vsz, vsz, temp);
5590 tcg_temp_free_i64(temp);
5591
5592 /* Zero the inactive elements. */
5593 gen_set_label(over);
60245996 5594 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5595}
5596
1a039c7e
RH
5597static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5598 int msz, int esz, int nreg)
5599{
71b9f394
RH
5600 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5601 { { { gen_helper_sve_st1bb_r,
5602 gen_helper_sve_st1bh_r,
5603 gen_helper_sve_st1bs_r,
5604 gen_helper_sve_st1bd_r },
5605 { NULL,
5606 gen_helper_sve_st1hh_le_r,
5607 gen_helper_sve_st1hs_le_r,
5608 gen_helper_sve_st1hd_le_r },
5609 { NULL, NULL,
5610 gen_helper_sve_st1ss_le_r,
5611 gen_helper_sve_st1sd_le_r },
5612 { NULL, NULL, NULL,
5613 gen_helper_sve_st1dd_le_r } },
5614 { { gen_helper_sve_st1bb_r,
5615 gen_helper_sve_st1bh_r,
5616 gen_helper_sve_st1bs_r,
5617 gen_helper_sve_st1bd_r },
5618 { NULL,
5619 gen_helper_sve_st1hh_be_r,
5620 gen_helper_sve_st1hs_be_r,
5621 gen_helper_sve_st1hd_be_r },
5622 { NULL, NULL,
5623 gen_helper_sve_st1ss_be_r,
5624 gen_helper_sve_st1sd_be_r },
5625 { NULL, NULL, NULL,
5626 gen_helper_sve_st1dd_be_r } } },
5627
5628 { { { gen_helper_sve_st1bb_r_mte,
5629 gen_helper_sve_st1bh_r_mte,
5630 gen_helper_sve_st1bs_r_mte,
5631 gen_helper_sve_st1bd_r_mte },
5632 { NULL,
5633 gen_helper_sve_st1hh_le_r_mte,
5634 gen_helper_sve_st1hs_le_r_mte,
5635 gen_helper_sve_st1hd_le_r_mte },
5636 { NULL, NULL,
5637 gen_helper_sve_st1ss_le_r_mte,
5638 gen_helper_sve_st1sd_le_r_mte },
5639 { NULL, NULL, NULL,
5640 gen_helper_sve_st1dd_le_r_mte } },
5641 { { gen_helper_sve_st1bb_r_mte,
5642 gen_helper_sve_st1bh_r_mte,
5643 gen_helper_sve_st1bs_r_mte,
5644 gen_helper_sve_st1bd_r_mte },
5645 { NULL,
5646 gen_helper_sve_st1hh_be_r_mte,
5647 gen_helper_sve_st1hs_be_r_mte,
5648 gen_helper_sve_st1hd_be_r_mte },
5649 { NULL, NULL,
5650 gen_helper_sve_st1ss_be_r_mte,
5651 gen_helper_sve_st1sd_be_r_mte },
5652 { NULL, NULL, NULL,
5653 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5654 };
71b9f394
RH
5655 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5656 { { { gen_helper_sve_st2bb_r,
5657 gen_helper_sve_st2hh_le_r,
5658 gen_helper_sve_st2ss_le_r,
5659 gen_helper_sve_st2dd_le_r },
5660 { gen_helper_sve_st3bb_r,
5661 gen_helper_sve_st3hh_le_r,
5662 gen_helper_sve_st3ss_le_r,
5663 gen_helper_sve_st3dd_le_r },
5664 { gen_helper_sve_st4bb_r,
5665 gen_helper_sve_st4hh_le_r,
5666 gen_helper_sve_st4ss_le_r,
5667 gen_helper_sve_st4dd_le_r } },
5668 { { gen_helper_sve_st2bb_r,
5669 gen_helper_sve_st2hh_be_r,
5670 gen_helper_sve_st2ss_be_r,
5671 gen_helper_sve_st2dd_be_r },
5672 { gen_helper_sve_st3bb_r,
5673 gen_helper_sve_st3hh_be_r,
5674 gen_helper_sve_st3ss_be_r,
5675 gen_helper_sve_st3dd_be_r },
5676 { gen_helper_sve_st4bb_r,
5677 gen_helper_sve_st4hh_be_r,
5678 gen_helper_sve_st4ss_be_r,
5679 gen_helper_sve_st4dd_be_r } } },
5680 { { { gen_helper_sve_st2bb_r_mte,
5681 gen_helper_sve_st2hh_le_r_mte,
5682 gen_helper_sve_st2ss_le_r_mte,
5683 gen_helper_sve_st2dd_le_r_mte },
5684 { gen_helper_sve_st3bb_r_mte,
5685 gen_helper_sve_st3hh_le_r_mte,
5686 gen_helper_sve_st3ss_le_r_mte,
5687 gen_helper_sve_st3dd_le_r_mte },
5688 { gen_helper_sve_st4bb_r_mte,
5689 gen_helper_sve_st4hh_le_r_mte,
5690 gen_helper_sve_st4ss_le_r_mte,
5691 gen_helper_sve_st4dd_le_r_mte } },
5692 { { gen_helper_sve_st2bb_r_mte,
5693 gen_helper_sve_st2hh_be_r_mte,
5694 gen_helper_sve_st2ss_be_r_mte,
5695 gen_helper_sve_st2dd_be_r_mte },
5696 { gen_helper_sve_st3bb_r_mte,
5697 gen_helper_sve_st3hh_be_r_mte,
5698 gen_helper_sve_st3ss_be_r_mte,
5699 gen_helper_sve_st3dd_be_r_mte },
5700 { gen_helper_sve_st4bb_r_mte,
5701 gen_helper_sve_st4hh_be_r_mte,
5702 gen_helper_sve_st4ss_be_r_mte,
5703 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5704 };
5705 gen_helper_gvec_mem *fn;
28d57f2d 5706 int be = s->be_data == MO_BE;
1a039c7e
RH
5707
5708 if (nreg == 0) {
5709 /* ST1 */
71b9f394
RH
5710 fn = fn_single[s->mte_active[0]][be][msz][esz];
5711 nreg = 1;
1a039c7e
RH
5712 } else {
5713 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5714 assert(msz == esz);
71b9f394 5715 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5716 }
5717 assert(fn != NULL);
71b9f394 5718 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5719}
5720
3a7be554 5721static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5722{
5723 if (a->rm == 31 || a->msz > a->esz) {
5724 return false;
5725 }
5726 if (sve_access_check(s)) {
5727 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5728 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5729 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5730 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5731 }
5732 return true;
5733}
5734
3a7be554 5735static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5736{
5737 if (a->msz > a->esz) {
5738 return false;
5739 }
5740 if (sve_access_check(s)) {
5741 int vsz = vec_full_reg_size(s);
5742 int elements = vsz >> a->esz;
5743 TCGv_i64 addr = new_tmp_a64(s);
5744
5745 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5746 (a->imm * elements * (a->nreg + 1)) << a->msz);
5747 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5748 }
5749 return true;
5750}
f6dbf62a
RH
5751
5752/*
5753 *** SVE gather loads / scatter stores
5754 */
5755
500d0484 5756static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5757 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5758 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5759{
5760 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5761 TCGv_ptr t_zm = tcg_temp_new_ptr();
5762 TCGv_ptr t_pg = tcg_temp_new_ptr();
5763 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5764 int desc = 0;
500d0484 5765
d28d12f0
RH
5766 if (s->mte_active[0]) {
5767 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5768 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5769 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5770 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5771 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5772 desc <<= SVE_MTEDESC_SHIFT;
5773 }
cdecb3fc 5774 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5775
5776 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5777 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5778 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5779 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5780
5781 tcg_temp_free_ptr(t_zt);
5782 tcg_temp_free_ptr(t_zm);
5783 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5784}
5785
d28d12f0
RH
5786/* Indexed by [mte][be][ff][xs][u][msz]. */
5787static gen_helper_gvec_mem_scatter * const
5788gather_load_fn32[2][2][2][2][2][3] = {
5789 { /* MTE Inactive */
5790 { /* Little-endian */
5791 { { { gen_helper_sve_ldbss_zsu,
5792 gen_helper_sve_ldhss_le_zsu,
5793 NULL, },
5794 { gen_helper_sve_ldbsu_zsu,
5795 gen_helper_sve_ldhsu_le_zsu,
5796 gen_helper_sve_ldss_le_zsu, } },
5797 { { gen_helper_sve_ldbss_zss,
5798 gen_helper_sve_ldhss_le_zss,
5799 NULL, },
5800 { gen_helper_sve_ldbsu_zss,
5801 gen_helper_sve_ldhsu_le_zss,
5802 gen_helper_sve_ldss_le_zss, } } },
5803
5804 /* First-fault */
5805 { { { gen_helper_sve_ldffbss_zsu,
5806 gen_helper_sve_ldffhss_le_zsu,
5807 NULL, },
5808 { gen_helper_sve_ldffbsu_zsu,
5809 gen_helper_sve_ldffhsu_le_zsu,
5810 gen_helper_sve_ldffss_le_zsu, } },
5811 { { gen_helper_sve_ldffbss_zss,
5812 gen_helper_sve_ldffhss_le_zss,
5813 NULL, },
5814 { gen_helper_sve_ldffbsu_zss,
5815 gen_helper_sve_ldffhsu_le_zss,
5816 gen_helper_sve_ldffss_le_zss, } } } },
5817
5818 { /* Big-endian */
5819 { { { gen_helper_sve_ldbss_zsu,
5820 gen_helper_sve_ldhss_be_zsu,
5821 NULL, },
5822 { gen_helper_sve_ldbsu_zsu,
5823 gen_helper_sve_ldhsu_be_zsu,
5824 gen_helper_sve_ldss_be_zsu, } },
5825 { { gen_helper_sve_ldbss_zss,
5826 gen_helper_sve_ldhss_be_zss,
5827 NULL, },
5828 { gen_helper_sve_ldbsu_zss,
5829 gen_helper_sve_ldhsu_be_zss,
5830 gen_helper_sve_ldss_be_zss, } } },
5831
5832 /* First-fault */
5833 { { { gen_helper_sve_ldffbss_zsu,
5834 gen_helper_sve_ldffhss_be_zsu,
5835 NULL, },
5836 { gen_helper_sve_ldffbsu_zsu,
5837 gen_helper_sve_ldffhsu_be_zsu,
5838 gen_helper_sve_ldffss_be_zsu, } },
5839 { { gen_helper_sve_ldffbss_zss,
5840 gen_helper_sve_ldffhss_be_zss,
5841 NULL, },
5842 { gen_helper_sve_ldffbsu_zss,
5843 gen_helper_sve_ldffhsu_be_zss,
5844 gen_helper_sve_ldffss_be_zss, } } } } },
5845 { /* MTE Active */
5846 { /* Little-endian */
5847 { { { gen_helper_sve_ldbss_zsu_mte,
5848 gen_helper_sve_ldhss_le_zsu_mte,
5849 NULL, },
5850 { gen_helper_sve_ldbsu_zsu_mte,
5851 gen_helper_sve_ldhsu_le_zsu_mte,
5852 gen_helper_sve_ldss_le_zsu_mte, } },
5853 { { gen_helper_sve_ldbss_zss_mte,
5854 gen_helper_sve_ldhss_le_zss_mte,
5855 NULL, },
5856 { gen_helper_sve_ldbsu_zss_mte,
5857 gen_helper_sve_ldhsu_le_zss_mte,
5858 gen_helper_sve_ldss_le_zss_mte, } } },
5859
5860 /* First-fault */
5861 { { { gen_helper_sve_ldffbss_zsu_mte,
5862 gen_helper_sve_ldffhss_le_zsu_mte,
5863 NULL, },
5864 { gen_helper_sve_ldffbsu_zsu_mte,
5865 gen_helper_sve_ldffhsu_le_zsu_mte,
5866 gen_helper_sve_ldffss_le_zsu_mte, } },
5867 { { gen_helper_sve_ldffbss_zss_mte,
5868 gen_helper_sve_ldffhss_le_zss_mte,
5869 NULL, },
5870 { gen_helper_sve_ldffbsu_zss_mte,
5871 gen_helper_sve_ldffhsu_le_zss_mte,
5872 gen_helper_sve_ldffss_le_zss_mte, } } } },
5873
5874 { /* Big-endian */
5875 { { { gen_helper_sve_ldbss_zsu_mte,
5876 gen_helper_sve_ldhss_be_zsu_mte,
5877 NULL, },
5878 { gen_helper_sve_ldbsu_zsu_mte,
5879 gen_helper_sve_ldhsu_be_zsu_mte,
5880 gen_helper_sve_ldss_be_zsu_mte, } },
5881 { { gen_helper_sve_ldbss_zss_mte,
5882 gen_helper_sve_ldhss_be_zss_mte,
5883 NULL, },
5884 { gen_helper_sve_ldbsu_zss_mte,
5885 gen_helper_sve_ldhsu_be_zss_mte,
5886 gen_helper_sve_ldss_be_zss_mte, } } },
5887
5888 /* First-fault */
5889 { { { gen_helper_sve_ldffbss_zsu_mte,
5890 gen_helper_sve_ldffhss_be_zsu_mte,
5891 NULL, },
5892 { gen_helper_sve_ldffbsu_zsu_mte,
5893 gen_helper_sve_ldffhsu_be_zsu_mte,
5894 gen_helper_sve_ldffss_be_zsu_mte, } },
5895 { { gen_helper_sve_ldffbss_zss_mte,
5896 gen_helper_sve_ldffhss_be_zss_mte,
5897 NULL, },
5898 { gen_helper_sve_ldffbsu_zss_mte,
5899 gen_helper_sve_ldffhsu_be_zss_mte,
5900 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5901};
5902
5903/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5904static gen_helper_gvec_mem_scatter * const
5905gather_load_fn64[2][2][2][3][2][4] = {
5906 { /* MTE Inactive */
5907 { /* Little-endian */
5908 { { { gen_helper_sve_ldbds_zsu,
5909 gen_helper_sve_ldhds_le_zsu,
5910 gen_helper_sve_ldsds_le_zsu,
5911 NULL, },
5912 { gen_helper_sve_ldbdu_zsu,
5913 gen_helper_sve_ldhdu_le_zsu,
5914 gen_helper_sve_ldsdu_le_zsu,
5915 gen_helper_sve_lddd_le_zsu, } },
5916 { { gen_helper_sve_ldbds_zss,
5917 gen_helper_sve_ldhds_le_zss,
5918 gen_helper_sve_ldsds_le_zss,
5919 NULL, },
5920 { gen_helper_sve_ldbdu_zss,
5921 gen_helper_sve_ldhdu_le_zss,
5922 gen_helper_sve_ldsdu_le_zss,
5923 gen_helper_sve_lddd_le_zss, } },
5924 { { gen_helper_sve_ldbds_zd,
5925 gen_helper_sve_ldhds_le_zd,
5926 gen_helper_sve_ldsds_le_zd,
5927 NULL, },
5928 { gen_helper_sve_ldbdu_zd,
5929 gen_helper_sve_ldhdu_le_zd,
5930 gen_helper_sve_ldsdu_le_zd,
5931 gen_helper_sve_lddd_le_zd, } } },
5932
5933 /* First-fault */
5934 { { { gen_helper_sve_ldffbds_zsu,
5935 gen_helper_sve_ldffhds_le_zsu,
5936 gen_helper_sve_ldffsds_le_zsu,
5937 NULL, },
5938 { gen_helper_sve_ldffbdu_zsu,
5939 gen_helper_sve_ldffhdu_le_zsu,
5940 gen_helper_sve_ldffsdu_le_zsu,
5941 gen_helper_sve_ldffdd_le_zsu, } },
5942 { { gen_helper_sve_ldffbds_zss,
5943 gen_helper_sve_ldffhds_le_zss,
5944 gen_helper_sve_ldffsds_le_zss,
5945 NULL, },
5946 { gen_helper_sve_ldffbdu_zss,
5947 gen_helper_sve_ldffhdu_le_zss,
5948 gen_helper_sve_ldffsdu_le_zss,
5949 gen_helper_sve_ldffdd_le_zss, } },
5950 { { gen_helper_sve_ldffbds_zd,
5951 gen_helper_sve_ldffhds_le_zd,
5952 gen_helper_sve_ldffsds_le_zd,
5953 NULL, },
5954 { gen_helper_sve_ldffbdu_zd,
5955 gen_helper_sve_ldffhdu_le_zd,
5956 gen_helper_sve_ldffsdu_le_zd,
5957 gen_helper_sve_ldffdd_le_zd, } } } },
5958 { /* Big-endian */
5959 { { { gen_helper_sve_ldbds_zsu,
5960 gen_helper_sve_ldhds_be_zsu,
5961 gen_helper_sve_ldsds_be_zsu,
5962 NULL, },
5963 { gen_helper_sve_ldbdu_zsu,
5964 gen_helper_sve_ldhdu_be_zsu,
5965 gen_helper_sve_ldsdu_be_zsu,
5966 gen_helper_sve_lddd_be_zsu, } },
5967 { { gen_helper_sve_ldbds_zss,
5968 gen_helper_sve_ldhds_be_zss,
5969 gen_helper_sve_ldsds_be_zss,
5970 NULL, },
5971 { gen_helper_sve_ldbdu_zss,
5972 gen_helper_sve_ldhdu_be_zss,
5973 gen_helper_sve_ldsdu_be_zss,
5974 gen_helper_sve_lddd_be_zss, } },
5975 { { gen_helper_sve_ldbds_zd,
5976 gen_helper_sve_ldhds_be_zd,
5977 gen_helper_sve_ldsds_be_zd,
5978 NULL, },
5979 { gen_helper_sve_ldbdu_zd,
5980 gen_helper_sve_ldhdu_be_zd,
5981 gen_helper_sve_ldsdu_be_zd,
5982 gen_helper_sve_lddd_be_zd, } } },
5983
5984 /* First-fault */
5985 { { { gen_helper_sve_ldffbds_zsu,
5986 gen_helper_sve_ldffhds_be_zsu,
5987 gen_helper_sve_ldffsds_be_zsu,
5988 NULL, },
5989 { gen_helper_sve_ldffbdu_zsu,
5990 gen_helper_sve_ldffhdu_be_zsu,
5991 gen_helper_sve_ldffsdu_be_zsu,
5992 gen_helper_sve_ldffdd_be_zsu, } },
5993 { { gen_helper_sve_ldffbds_zss,
5994 gen_helper_sve_ldffhds_be_zss,
5995 gen_helper_sve_ldffsds_be_zss,
5996 NULL, },
5997 { gen_helper_sve_ldffbdu_zss,
5998 gen_helper_sve_ldffhdu_be_zss,
5999 gen_helper_sve_ldffsdu_be_zss,
6000 gen_helper_sve_ldffdd_be_zss, } },
6001 { { gen_helper_sve_ldffbds_zd,
6002 gen_helper_sve_ldffhds_be_zd,
6003 gen_helper_sve_ldffsds_be_zd,
6004 NULL, },
6005 { gen_helper_sve_ldffbdu_zd,
6006 gen_helper_sve_ldffhdu_be_zd,
6007 gen_helper_sve_ldffsdu_be_zd,
6008 gen_helper_sve_ldffdd_be_zd, } } } } },
6009 { /* MTE Active */
6010 { /* Little-endian */
6011 { { { gen_helper_sve_ldbds_zsu_mte,
6012 gen_helper_sve_ldhds_le_zsu_mte,
6013 gen_helper_sve_ldsds_le_zsu_mte,
6014 NULL, },
6015 { gen_helper_sve_ldbdu_zsu_mte,
6016 gen_helper_sve_ldhdu_le_zsu_mte,
6017 gen_helper_sve_ldsdu_le_zsu_mte,
6018 gen_helper_sve_lddd_le_zsu_mte, } },
6019 { { gen_helper_sve_ldbds_zss_mte,
6020 gen_helper_sve_ldhds_le_zss_mte,
6021 gen_helper_sve_ldsds_le_zss_mte,
6022 NULL, },
6023 { gen_helper_sve_ldbdu_zss_mte,
6024 gen_helper_sve_ldhdu_le_zss_mte,
6025 gen_helper_sve_ldsdu_le_zss_mte,
6026 gen_helper_sve_lddd_le_zss_mte, } },
6027 { { gen_helper_sve_ldbds_zd_mte,
6028 gen_helper_sve_ldhds_le_zd_mte,
6029 gen_helper_sve_ldsds_le_zd_mte,
6030 NULL, },
6031 { gen_helper_sve_ldbdu_zd_mte,
6032 gen_helper_sve_ldhdu_le_zd_mte,
6033 gen_helper_sve_ldsdu_le_zd_mte,
6034 gen_helper_sve_lddd_le_zd_mte, } } },
6035
6036 /* First-fault */
6037 { { { gen_helper_sve_ldffbds_zsu_mte,
6038 gen_helper_sve_ldffhds_le_zsu_mte,
6039 gen_helper_sve_ldffsds_le_zsu_mte,
6040 NULL, },
6041 { gen_helper_sve_ldffbdu_zsu_mte,
6042 gen_helper_sve_ldffhdu_le_zsu_mte,
6043 gen_helper_sve_ldffsdu_le_zsu_mte,
6044 gen_helper_sve_ldffdd_le_zsu_mte, } },
6045 { { gen_helper_sve_ldffbds_zss_mte,
6046 gen_helper_sve_ldffhds_le_zss_mte,
6047 gen_helper_sve_ldffsds_le_zss_mte,
6048 NULL, },
6049 { gen_helper_sve_ldffbdu_zss_mte,
6050 gen_helper_sve_ldffhdu_le_zss_mte,
6051 gen_helper_sve_ldffsdu_le_zss_mte,
6052 gen_helper_sve_ldffdd_le_zss_mte, } },
6053 { { gen_helper_sve_ldffbds_zd_mte,
6054 gen_helper_sve_ldffhds_le_zd_mte,
6055 gen_helper_sve_ldffsds_le_zd_mte,
6056 NULL, },
6057 { gen_helper_sve_ldffbdu_zd_mte,
6058 gen_helper_sve_ldffhdu_le_zd_mte,
6059 gen_helper_sve_ldffsdu_le_zd_mte,
6060 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6061 { /* Big-endian */
6062 { { { gen_helper_sve_ldbds_zsu_mte,
6063 gen_helper_sve_ldhds_be_zsu_mte,
6064 gen_helper_sve_ldsds_be_zsu_mte,
6065 NULL, },
6066 { gen_helper_sve_ldbdu_zsu_mte,
6067 gen_helper_sve_ldhdu_be_zsu_mte,
6068 gen_helper_sve_ldsdu_be_zsu_mte,
6069 gen_helper_sve_lddd_be_zsu_mte, } },
6070 { { gen_helper_sve_ldbds_zss_mte,
6071 gen_helper_sve_ldhds_be_zss_mte,
6072 gen_helper_sve_ldsds_be_zss_mte,
6073 NULL, },
6074 { gen_helper_sve_ldbdu_zss_mte,
6075 gen_helper_sve_ldhdu_be_zss_mte,
6076 gen_helper_sve_ldsdu_be_zss_mte,
6077 gen_helper_sve_lddd_be_zss_mte, } },
6078 { { gen_helper_sve_ldbds_zd_mte,
6079 gen_helper_sve_ldhds_be_zd_mte,
6080 gen_helper_sve_ldsds_be_zd_mte,
6081 NULL, },
6082 { gen_helper_sve_ldbdu_zd_mte,
6083 gen_helper_sve_ldhdu_be_zd_mte,
6084 gen_helper_sve_ldsdu_be_zd_mte,
6085 gen_helper_sve_lddd_be_zd_mte, } } },
6086
6087 /* First-fault */
6088 { { { gen_helper_sve_ldffbds_zsu_mte,
6089 gen_helper_sve_ldffhds_be_zsu_mte,
6090 gen_helper_sve_ldffsds_be_zsu_mte,
6091 NULL, },
6092 { gen_helper_sve_ldffbdu_zsu_mte,
6093 gen_helper_sve_ldffhdu_be_zsu_mte,
6094 gen_helper_sve_ldffsdu_be_zsu_mte,
6095 gen_helper_sve_ldffdd_be_zsu_mte, } },
6096 { { gen_helper_sve_ldffbds_zss_mte,
6097 gen_helper_sve_ldffhds_be_zss_mte,
6098 gen_helper_sve_ldffsds_be_zss_mte,
6099 NULL, },
6100 { gen_helper_sve_ldffbdu_zss_mte,
6101 gen_helper_sve_ldffhdu_be_zss_mte,
6102 gen_helper_sve_ldffsdu_be_zss_mte,
6103 gen_helper_sve_ldffdd_be_zss_mte, } },
6104 { { gen_helper_sve_ldffbds_zd_mte,
6105 gen_helper_sve_ldffhds_be_zd_mte,
6106 gen_helper_sve_ldffsds_be_zd_mte,
6107 NULL, },
6108 { gen_helper_sve_ldffbdu_zd_mte,
6109 gen_helper_sve_ldffhdu_be_zd_mte,
6110 gen_helper_sve_ldffsdu_be_zd_mte,
6111 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6112};
6113
3a7be554 6114static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6115{
6116 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6117 bool be = s->be_data == MO_BE;
6118 bool mte = s->mte_active[0];
673e9fa6
RH
6119
6120 if (!sve_access_check(s)) {
6121 return true;
6122 }
6123
6124 switch (a->esz) {
6125 case MO_32:
d28d12f0 6126 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6127 break;
6128 case MO_64:
d28d12f0 6129 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6130 break;
6131 }
6132 assert(fn != NULL);
6133
6134 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6135 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6136 return true;
6137}
6138
3a7be554 6139static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6140{
6141 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6142 bool be = s->be_data == MO_BE;
6143 bool mte = s->mte_active[0];
673e9fa6
RH
6144
6145 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6146 return false;
6147 }
6148 if (!sve_access_check(s)) {
6149 return true;
6150 }
6151
6152 switch (a->esz) {
6153 case MO_32:
d28d12f0 6154 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6155 break;
6156 case MO_64:
d28d12f0 6157 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6158 break;
6159 }
6160 assert(fn != NULL);
6161
6162 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6163 * by loading the immediate into the scalar parameter.
6164 */
2ccdf94f
RH
6165 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6166 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6167 return true;
6168}
6169
cf327449
SL
6170static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6171{
b17ab470
RH
6172 gen_helper_gvec_mem_scatter *fn = NULL;
6173 bool be = s->be_data == MO_BE;
6174 bool mte = s->mte_active[0];
6175
6176 if (a->esz < a->msz + !a->u) {
6177 return false;
6178 }
cf327449
SL
6179 if (!dc_isar_feature(aa64_sve2, s)) {
6180 return false;
6181 }
b17ab470
RH
6182 if (!sve_access_check(s)) {
6183 return true;
6184 }
6185
6186 switch (a->esz) {
6187 case MO_32:
6188 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6189 break;
6190 case MO_64:
6191 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6192 break;
6193 }
6194 assert(fn != NULL);
6195
6196 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6197 cpu_reg(s, a->rm), a->msz, false, fn);
6198 return true;
cf327449
SL
6199}
6200
d28d12f0
RH
6201/* Indexed by [mte][be][xs][msz]. */
6202static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6203 { /* MTE Inactive */
6204 { /* Little-endian */
6205 { gen_helper_sve_stbs_zsu,
6206 gen_helper_sve_sths_le_zsu,
6207 gen_helper_sve_stss_le_zsu, },
6208 { gen_helper_sve_stbs_zss,
6209 gen_helper_sve_sths_le_zss,
6210 gen_helper_sve_stss_le_zss, } },
6211 { /* Big-endian */
6212 { gen_helper_sve_stbs_zsu,
6213 gen_helper_sve_sths_be_zsu,
6214 gen_helper_sve_stss_be_zsu, },
6215 { gen_helper_sve_stbs_zss,
6216 gen_helper_sve_sths_be_zss,
6217 gen_helper_sve_stss_be_zss, } } },
6218 { /* MTE Active */
6219 { /* Little-endian */
6220 { gen_helper_sve_stbs_zsu_mte,
6221 gen_helper_sve_sths_le_zsu_mte,
6222 gen_helper_sve_stss_le_zsu_mte, },
6223 { gen_helper_sve_stbs_zss_mte,
6224 gen_helper_sve_sths_le_zss_mte,
6225 gen_helper_sve_stss_le_zss_mte, } },
6226 { /* Big-endian */
6227 { gen_helper_sve_stbs_zsu_mte,
6228 gen_helper_sve_sths_be_zsu_mte,
6229 gen_helper_sve_stss_be_zsu_mte, },
6230 { gen_helper_sve_stbs_zss_mte,
6231 gen_helper_sve_sths_be_zss_mte,
6232 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6233};
6234
6235/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6236static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6237 { /* MTE Inactive */
6238 { /* Little-endian */
6239 { gen_helper_sve_stbd_zsu,
6240 gen_helper_sve_sthd_le_zsu,
6241 gen_helper_sve_stsd_le_zsu,
6242 gen_helper_sve_stdd_le_zsu, },
6243 { gen_helper_sve_stbd_zss,
6244 gen_helper_sve_sthd_le_zss,
6245 gen_helper_sve_stsd_le_zss,
6246 gen_helper_sve_stdd_le_zss, },
6247 { gen_helper_sve_stbd_zd,
6248 gen_helper_sve_sthd_le_zd,
6249 gen_helper_sve_stsd_le_zd,
6250 gen_helper_sve_stdd_le_zd, } },
6251 { /* Big-endian */
6252 { gen_helper_sve_stbd_zsu,
6253 gen_helper_sve_sthd_be_zsu,
6254 gen_helper_sve_stsd_be_zsu,
6255 gen_helper_sve_stdd_be_zsu, },
6256 { gen_helper_sve_stbd_zss,
6257 gen_helper_sve_sthd_be_zss,
6258 gen_helper_sve_stsd_be_zss,
6259 gen_helper_sve_stdd_be_zss, },
6260 { gen_helper_sve_stbd_zd,
6261 gen_helper_sve_sthd_be_zd,
6262 gen_helper_sve_stsd_be_zd,
6263 gen_helper_sve_stdd_be_zd, } } },
6264 { /* MTE Inactive */
6265 { /* Little-endian */
6266 { gen_helper_sve_stbd_zsu_mte,
6267 gen_helper_sve_sthd_le_zsu_mte,
6268 gen_helper_sve_stsd_le_zsu_mte,
6269 gen_helper_sve_stdd_le_zsu_mte, },
6270 { gen_helper_sve_stbd_zss_mte,
6271 gen_helper_sve_sthd_le_zss_mte,
6272 gen_helper_sve_stsd_le_zss_mte,
6273 gen_helper_sve_stdd_le_zss_mte, },
6274 { gen_helper_sve_stbd_zd_mte,
6275 gen_helper_sve_sthd_le_zd_mte,
6276 gen_helper_sve_stsd_le_zd_mte,
6277 gen_helper_sve_stdd_le_zd_mte, } },
6278 { /* Big-endian */
6279 { gen_helper_sve_stbd_zsu_mte,
6280 gen_helper_sve_sthd_be_zsu_mte,
6281 gen_helper_sve_stsd_be_zsu_mte,
6282 gen_helper_sve_stdd_be_zsu_mte, },
6283 { gen_helper_sve_stbd_zss_mte,
6284 gen_helper_sve_sthd_be_zss_mte,
6285 gen_helper_sve_stsd_be_zss_mte,
6286 gen_helper_sve_stdd_be_zss_mte, },
6287 { gen_helper_sve_stbd_zd_mte,
6288 gen_helper_sve_sthd_be_zd_mte,
6289 gen_helper_sve_stsd_be_zd_mte,
6290 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6291};
6292
3a7be554 6293static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6294{
f6dbf62a 6295 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6296 bool be = s->be_data == MO_BE;
6297 bool mte = s->mte_active[0];
f6dbf62a
RH
6298
6299 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6300 return false;
6301 }
6302 if (!sve_access_check(s)) {
6303 return true;
6304 }
6305 switch (a->esz) {
6306 case MO_32:
d28d12f0 6307 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6308 break;
6309 case MO_64:
d28d12f0 6310 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6311 break;
6312 default:
6313 g_assert_not_reached();
6314 }
6315 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6316 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6317 return true;
6318}
dec6cf6b 6319
3a7be554 6320static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6321{
6322 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6323 bool be = s->be_data == MO_BE;
6324 bool mte = s->mte_active[0];
408ecde9
RH
6325
6326 if (a->esz < a->msz) {
6327 return false;
6328 }
6329 if (!sve_access_check(s)) {
6330 return true;
6331 }
6332
6333 switch (a->esz) {
6334 case MO_32:
d28d12f0 6335 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6336 break;
6337 case MO_64:
d28d12f0 6338 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6339 break;
6340 }
6341 assert(fn != NULL);
6342
6343 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6344 * by loading the immediate into the scalar parameter.
6345 */
2ccdf94f
RH
6346 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6347 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6348 return true;
6349}
6350
6ebca45f
SL
6351static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6352{
b17ab470
RH
6353 gen_helper_gvec_mem_scatter *fn;
6354 bool be = s->be_data == MO_BE;
6355 bool mte = s->mte_active[0];
6356
6357 if (a->esz < a->msz) {
6358 return false;
6359 }
6ebca45f
SL
6360 if (!dc_isar_feature(aa64_sve2, s)) {
6361 return false;
6362 }
b17ab470
RH
6363 if (!sve_access_check(s)) {
6364 return true;
6365 }
6366
6367 switch (a->esz) {
6368 case MO_32:
6369 fn = scatter_store_fn32[mte][be][0][a->msz];
6370 break;
6371 case MO_64:
6372 fn = scatter_store_fn64[mte][be][2][a->msz];
6373 break;
6374 default:
6375 g_assert_not_reached();
6376 }
6377
6378 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6379 cpu_reg(s, a->rm), a->msz, true, fn);
6380 return true;
6ebca45f
SL
6381}
6382
dec6cf6b
RH
6383/*
6384 * Prefetches
6385 */
6386
3a7be554 6387static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6388{
6389 /* Prefetch is a nop within QEMU. */
2f95a3b0 6390 (void)sve_access_check(s);
dec6cf6b
RH
6391 return true;
6392}
6393
3a7be554 6394static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6395{
6396 if (a->rm == 31) {
6397 return false;
6398 }
6399 /* Prefetch is a nop within QEMU. */
2f95a3b0 6400 (void)sve_access_check(s);
dec6cf6b
RH
6401 return true;
6402}
a2103582
RH
6403
6404/*
6405 * Move Prefix
6406 *
6407 * TODO: The implementation so far could handle predicated merging movprfx.
6408 * The helper functions as written take an extra source register to
6409 * use in the operation, but the result is only written when predication
6410 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6411 * to allow the final write back to the destination to be unconditional.
6412 * For predicated zeroing movprfx, we need to rearrange the helpers to
6413 * allow the final write back to zero inactives.
6414 *
6415 * In the meantime, just emit the moves.
6416 */
6417
3a7be554 6418static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6419{
6420 return do_mov_z(s, a->rd, a->rn);
6421}
6422
3a7be554 6423static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6424{
6425 if (sve_access_check(s)) {
6426 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6427 }
6428 return true;
6429}
6430
3a7be554 6431static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6432{
60245996 6433 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6434}
5dad1ba5
RH
6435
6436/*
6437 * SVE2 Integer Multiply - Unpredicated
6438 */
6439
6440static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6441{
6442 if (!dc_isar_feature(aa64_sve2, s)) {
6443 return false;
6444 }
6445 if (sve_access_check(s)) {
6446 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6447 }
6448 return true;
6449}
6450
bd394cf5
RH
6451static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6452 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6453 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6454};
6455TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6456 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6457
bd394cf5
RH
6458static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6459 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6460 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6461};
6462TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6463 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6464
bd394cf5
RH
6465TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6466 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6467
bd394cf5
RH
6468static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6469 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6470 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6471};
6472TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6473 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6474
bd394cf5
RH
6475static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6476 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6477 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6478};
6479TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6480 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6481
d4b1e59d
RH
6482/*
6483 * SVE2 Integer - Predicated
6484 */
6485
6486static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6487 gen_helper_gvec_4 *fn)
6488{
6489 if (!dc_isar_feature(aa64_sve2, s)) {
6490 return false;
6491 }
6492 return do_zpzz_ool(s, a, fn);
6493}
6494
6495static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6496{
6497 static gen_helper_gvec_4 * const fns[3] = {
6498 gen_helper_sve2_sadalp_zpzz_h,
6499 gen_helper_sve2_sadalp_zpzz_s,
6500 gen_helper_sve2_sadalp_zpzz_d,
6501 };
6502 if (a->esz == 0) {
6503 return false;
6504 }
6505 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6506}
6507
6508static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6509{
6510 static gen_helper_gvec_4 * const fns[3] = {
6511 gen_helper_sve2_uadalp_zpzz_h,
6512 gen_helper_sve2_uadalp_zpzz_s,
6513 gen_helper_sve2_uadalp_zpzz_d,
6514 };
6515 if (a->esz == 0) {
6516 return false;
6517 }
6518 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6519}
db366da8
RH
6520
6521/*
6522 * SVE2 integer unary operations (predicated)
6523 */
6524
b2c00961
RH
6525TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6526 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6527
b2c00961
RH
6528TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6529 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6530
b2c00961
RH
6531static gen_helper_gvec_3 * const sqabs_fns[4] = {
6532 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6533 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6534};
6535TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6536
b2c00961
RH
6537static gen_helper_gvec_3 * const sqneg_fns[4] = {
6538 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6539 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6540};
6541TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d
RH
6542
6543#define DO_SVE2_ZPZZ(NAME, name) \
6544static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6545{ \
6546 static gen_helper_gvec_4 * const fns[4] = { \
6547 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6548 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6549 }; \
6550 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6551}
6552
6553DO_SVE2_ZPZZ(SQSHL, sqshl)
6554DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6555DO_SVE2_ZPZZ(SRSHL, srshl)
6556
6557DO_SVE2_ZPZZ(UQSHL, uqshl)
6558DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6559DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6560
6561DO_SVE2_ZPZZ(SHADD, shadd)
6562DO_SVE2_ZPZZ(SRHADD, srhadd)
6563DO_SVE2_ZPZZ(SHSUB, shsub)
6564
6565DO_SVE2_ZPZZ(UHADD, uhadd)
6566DO_SVE2_ZPZZ(URHADD, urhadd)
6567DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6568
6569DO_SVE2_ZPZZ(ADDP, addp)
6570DO_SVE2_ZPZZ(SMAXP, smaxp)
6571DO_SVE2_ZPZZ(UMAXP, umaxp)
6572DO_SVE2_ZPZZ(SMINP, sminp)
6573DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6574
6575DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6576DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6577DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6578DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6579DO_SVE2_ZPZZ(SUQADD, suqadd)
6580DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6581
6582/*
6583 * SVE2 Widening Integer Arithmetic
6584 */
6585
615f19fe
RH
6586static gen_helper_gvec_3 * const saddl_fns[4] = {
6587 NULL, gen_helper_sve2_saddl_h,
6588 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6589};
6590TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6591 saddl_fns[a->esz], a, 0)
6592TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6593 saddl_fns[a->esz], a, 3)
6594TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6595 saddl_fns[a->esz], a, 2)
6596
6597static gen_helper_gvec_3 * const ssubl_fns[4] = {
6598 NULL, gen_helper_sve2_ssubl_h,
6599 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6600};
6601TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6602 ssubl_fns[a->esz], a, 0)
6603TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6604 ssubl_fns[a->esz], a, 3)
6605TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6606 ssubl_fns[a->esz], a, 2)
6607TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6608 ssubl_fns[a->esz], a, 1)
6609
6610static gen_helper_gvec_3 * const sabdl_fns[4] = {
6611 NULL, gen_helper_sve2_sabdl_h,
6612 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6613};
6614TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6615 sabdl_fns[a->esz], a, 0)
6616TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6617 sabdl_fns[a->esz], a, 3)
6618
6619static gen_helper_gvec_3 * const uaddl_fns[4] = {
6620 NULL, gen_helper_sve2_uaddl_h,
6621 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6622};
6623TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6624 uaddl_fns[a->esz], a, 0)
6625TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6626 uaddl_fns[a->esz], a, 3)
6627
6628static gen_helper_gvec_3 * const usubl_fns[4] = {
6629 NULL, gen_helper_sve2_usubl_h,
6630 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6631};
6632TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6633 usubl_fns[a->esz], a, 0)
6634TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6635 usubl_fns[a->esz], a, 3)
6636
6637static gen_helper_gvec_3 * const uabdl_fns[4] = {
6638 NULL, gen_helper_sve2_uabdl_h,
6639 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6640};
6641TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6642 uabdl_fns[a->esz], a, 0)
6643TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6644 uabdl_fns[a->esz], a, 3)
6645
6646static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6647 NULL, gen_helper_sve2_sqdmull_zzz_h,
6648 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6649};
6650TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6651 sqdmull_fns[a->esz], a, 0)
6652TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6653 sqdmull_fns[a->esz], a, 3)
6654
6655static gen_helper_gvec_3 * const smull_fns[4] = {
6656 NULL, gen_helper_sve2_smull_zzz_h,
6657 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6658};
6659TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6660 smull_fns[a->esz], a, 0)
6661TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6662 smull_fns[a->esz], a, 3)
6663
6664static gen_helper_gvec_3 * const umull_fns[4] = {
6665 NULL, gen_helper_sve2_umull_zzz_h,
6666 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6667};
6668TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6669 umull_fns[a->esz], a, 0)
6670TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6671 umull_fns[a->esz], a, 3)
6672
6673static gen_helper_gvec_3 * const eoril_fns[4] = {
6674 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6675 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6676};
6677TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6678TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6679
e3a56131
RH
6680static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6681{
6682 static gen_helper_gvec_3 * const fns[4] = {
6683 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6684 NULL, gen_helper_sve2_pmull_d,
6685 };
6686 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6687 return false;
6688 }
615f19fe 6689 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6690}
6691
615f19fe
RH
6692TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6693TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6694
615f19fe
RH
6695static gen_helper_gvec_3 * const saddw_fns[4] = {
6696 NULL, gen_helper_sve2_saddw_h,
6697 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6698};
6699TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6700TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6701
615f19fe
RH
6702static gen_helper_gvec_3 * const ssubw_fns[4] = {
6703 NULL, gen_helper_sve2_ssubw_h,
6704 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6705};
6706TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6707TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6708
615f19fe
RH
6709static gen_helper_gvec_3 * const uaddw_fns[4] = {
6710 NULL, gen_helper_sve2_uaddw_h,
6711 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6712};
6713TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6714TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6715
615f19fe
RH
6716static gen_helper_gvec_3 * const usubw_fns[4] = {
6717 NULL, gen_helper_sve2_usubw_h,
6718 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6719};
6720TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6721TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6722
6723static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6724{
6725 int top = imm & 1;
6726 int shl = imm >> 1;
6727 int halfbits = 4 << vece;
6728
6729 if (top) {
6730 if (shl == halfbits) {
6731 TCGv_vec t = tcg_temp_new_vec_matching(d);
6732 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6733 tcg_gen_and_vec(vece, d, n, t);
6734 tcg_temp_free_vec(t);
6735 } else {
6736 tcg_gen_sari_vec(vece, d, n, halfbits);
6737 tcg_gen_shli_vec(vece, d, d, shl);
6738 }
6739 } else {
6740 tcg_gen_shli_vec(vece, d, n, halfbits);
6741 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6742 }
6743}
6744
6745static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6746{
6747 int halfbits = 4 << vece;
6748 int top = imm & 1;
6749 int shl = (imm >> 1);
6750 int shift;
6751 uint64_t mask;
6752
6753 mask = MAKE_64BIT_MASK(0, halfbits);
6754 mask <<= shl;
6755 mask = dup_const(vece, mask);
6756
6757 shift = shl - top * halfbits;
6758 if (shift < 0) {
6759 tcg_gen_shri_i64(d, n, -shift);
6760 } else {
6761 tcg_gen_shli_i64(d, n, shift);
6762 }
6763 tcg_gen_andi_i64(d, d, mask);
6764}
6765
6766static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6767{
6768 gen_ushll_i64(MO_16, d, n, imm);
6769}
6770
6771static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6772{
6773 gen_ushll_i64(MO_32, d, n, imm);
6774}
6775
6776static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6777{
6778 gen_ushll_i64(MO_64, d, n, imm);
6779}
6780
6781static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6782{
6783 int halfbits = 4 << vece;
6784 int top = imm & 1;
6785 int shl = imm >> 1;
6786
6787 if (top) {
6788 if (shl == halfbits) {
6789 TCGv_vec t = tcg_temp_new_vec_matching(d);
6790 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6791 tcg_gen_and_vec(vece, d, n, t);
6792 tcg_temp_free_vec(t);
6793 } else {
6794 tcg_gen_shri_vec(vece, d, n, halfbits);
6795 tcg_gen_shli_vec(vece, d, d, shl);
6796 }
6797 } else {
6798 if (shl == 0) {
6799 TCGv_vec t = tcg_temp_new_vec_matching(d);
6800 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6801 tcg_gen_and_vec(vece, d, n, t);
6802 tcg_temp_free_vec(t);
6803 } else {
6804 tcg_gen_shli_vec(vece, d, n, halfbits);
6805 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6806 }
6807 }
6808}
6809
6810static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6811 bool sel, bool uns)
6812{
6813 static const TCGOpcode sshll_list[] = {
6814 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6815 };
6816 static const TCGOpcode ushll_list[] = {
6817 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6818 };
6819 static const GVecGen2i ops[2][3] = {
6820 { { .fniv = gen_sshll_vec,
6821 .opt_opc = sshll_list,
6822 .fno = gen_helper_sve2_sshll_h,
6823 .vece = MO_16 },
6824 { .fniv = gen_sshll_vec,
6825 .opt_opc = sshll_list,
6826 .fno = gen_helper_sve2_sshll_s,
6827 .vece = MO_32 },
6828 { .fniv = gen_sshll_vec,
6829 .opt_opc = sshll_list,
6830 .fno = gen_helper_sve2_sshll_d,
6831 .vece = MO_64 } },
6832 { { .fni8 = gen_ushll16_i64,
6833 .fniv = gen_ushll_vec,
6834 .opt_opc = ushll_list,
6835 .fno = gen_helper_sve2_ushll_h,
6836 .vece = MO_16 },
6837 { .fni8 = gen_ushll32_i64,
6838 .fniv = gen_ushll_vec,
6839 .opt_opc = ushll_list,
6840 .fno = gen_helper_sve2_ushll_s,
6841 .vece = MO_32 },
6842 { .fni8 = gen_ushll64_i64,
6843 .fniv = gen_ushll_vec,
6844 .opt_opc = ushll_list,
6845 .fno = gen_helper_sve2_ushll_d,
6846 .vece = MO_64 } },
6847 };
6848
6849 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6850 return false;
6851 }
6852 if (sve_access_check(s)) {
6853 unsigned vsz = vec_full_reg_size(s);
6854 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6855 vec_full_reg_offset(s, a->rn),
6856 vsz, vsz, (a->imm << 1) | sel,
6857 &ops[uns][a->esz]);
6858 }
6859 return true;
6860}
6861
6862static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6863{
6864 return do_sve2_shll_tb(s, a, false, false);
6865}
6866
6867static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6868{
6869 return do_sve2_shll_tb(s, a, true, false);
6870}
6871
6872static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6873{
6874 return do_sve2_shll_tb(s, a, false, true);
6875}
6876
6877static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6878{
6879 return do_sve2_shll_tb(s, a, true, true);
6880}
cb9c33b8 6881
615f19fe
RH
6882static gen_helper_gvec_3 * const bext_fns[4] = {
6883 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6884 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6885};
6886TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6887 bext_fns[a->esz], a, 0)
ed4a6387 6888
615f19fe
RH
6889static gen_helper_gvec_3 * const bdep_fns[4] = {
6890 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6891 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6892};
6893TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6894 bdep_fns[a->esz], a, 0)
ed4a6387 6895
615f19fe
RH
6896static gen_helper_gvec_3 * const bgrp_fns[4] = {
6897 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6898 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6899};
6900TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6901 bgrp_fns[a->esz], a, 0)
ed4a6387 6902
615f19fe
RH
6903static gen_helper_gvec_3 * const cadd_fns[4] = {
6904 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6905 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6906};
6907TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6908 cadd_fns[a->esz], a, 0)
6909TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6910 cadd_fns[a->esz], a, 1)
6911
6912static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6913 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6914 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6915};
6916TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6917 sqcadd_fns[a->esz], a, 0)
6918TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6919 sqcadd_fns[a->esz], a, 1)
38650638 6920
eeb4e84d
RH
6921static gen_helper_gvec_4 * const sabal_fns[4] = {
6922 NULL, gen_helper_sve2_sabal_h,
6923 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6924};
6925TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6926TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6927
eeb4e84d
RH
6928static gen_helper_gvec_4 * const uabal_fns[4] = {
6929 NULL, gen_helper_sve2_uabal_h,
6930 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6931};
6932TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6933TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6934
6935static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6936{
6937 static gen_helper_gvec_4 * const fns[2] = {
6938 gen_helper_sve2_adcl_s,
6939 gen_helper_sve2_adcl_d,
6940 };
6941 /*
6942 * Note that in this case the ESZ field encodes both size and sign.
6943 * Split out 'subtract' into bit 1 of the data field for the helper.
6944 */
eeb4e84d 6945 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6946}
6947
eeb4e84d
RH
6948TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6949TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e
RH
6950
6951static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
6952{
6953 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
6954 return false;
6955 }
6956 if (sve_access_check(s)) {
6957 unsigned vsz = vec_full_reg_size(s);
6958 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
6959 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
6960 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
6961 }
6962 return true;
6963}
6964
6965static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
6966{
6967 return do_sve2_fn2i(s, a, gen_gvec_ssra);
6968}
6969
6970static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
6971{
6972 return do_sve2_fn2i(s, a, gen_gvec_usra);
6973}
6974
6975static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
6976{
6977 return do_sve2_fn2i(s, a, gen_gvec_srsra);
6978}
6979
6980static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
6981{
6982 return do_sve2_fn2i(s, a, gen_gvec_ursra);
6983}
fc12b46a
RH
6984
6985static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
6986{
6987 return do_sve2_fn2i(s, a, gen_gvec_sri);
6988}
6989
6990static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
6991{
6992 return do_sve2_fn2i(s, a, gen_gvec_sli);
6993}
289a1797
RH
6994
6995static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
6996{
6997 if (!dc_isar_feature(aa64_sve2, s)) {
6998 return false;
6999 }
7000 if (sve_access_check(s)) {
7001 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7002 }
7003 return true;
7004}
7005
7006static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7007{
7008 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7009}
7010
7011static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7012{
7013 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7014}
5ff2838d
RH
7015
7016static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7017 const GVecGen2 ops[3])
7018{
7019 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7020 !dc_isar_feature(aa64_sve2, s)) {
7021 return false;
7022 }
7023 if (sve_access_check(s)) {
7024 unsigned vsz = vec_full_reg_size(s);
7025 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7026 vec_full_reg_offset(s, a->rn),
7027 vsz, vsz, &ops[a->esz]);
7028 }
7029 return true;
7030}
7031
7032static const TCGOpcode sqxtn_list[] = {
7033 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7034};
7035
7036static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7037{
7038 TCGv_vec t = tcg_temp_new_vec_matching(d);
7039 int halfbits = 4 << vece;
7040 int64_t mask = (1ull << halfbits) - 1;
7041 int64_t min = -1ull << (halfbits - 1);
7042 int64_t max = -min - 1;
7043
7044 tcg_gen_dupi_vec(vece, t, min);
7045 tcg_gen_smax_vec(vece, d, n, t);
7046 tcg_gen_dupi_vec(vece, t, max);
7047 tcg_gen_smin_vec(vece, d, d, t);
7048 tcg_gen_dupi_vec(vece, t, mask);
7049 tcg_gen_and_vec(vece, d, d, t);
7050 tcg_temp_free_vec(t);
7051}
7052
7053static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7054{
7055 static const GVecGen2 ops[3] = {
7056 { .fniv = gen_sqxtnb_vec,
7057 .opt_opc = sqxtn_list,
7058 .fno = gen_helper_sve2_sqxtnb_h,
7059 .vece = MO_16 },
7060 { .fniv = gen_sqxtnb_vec,
7061 .opt_opc = sqxtn_list,
7062 .fno = gen_helper_sve2_sqxtnb_s,
7063 .vece = MO_32 },
7064 { .fniv = gen_sqxtnb_vec,
7065 .opt_opc = sqxtn_list,
7066 .fno = gen_helper_sve2_sqxtnb_d,
7067 .vece = MO_64 },
7068 };
7069 return do_sve2_narrow_extract(s, a, ops);
7070}
7071
7072static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7073{
7074 TCGv_vec t = tcg_temp_new_vec_matching(d);
7075 int halfbits = 4 << vece;
7076 int64_t mask = (1ull << halfbits) - 1;
7077 int64_t min = -1ull << (halfbits - 1);
7078 int64_t max = -min - 1;
7079
7080 tcg_gen_dupi_vec(vece, t, min);
7081 tcg_gen_smax_vec(vece, n, n, t);
7082 tcg_gen_dupi_vec(vece, t, max);
7083 tcg_gen_smin_vec(vece, n, n, t);
7084 tcg_gen_shli_vec(vece, n, n, halfbits);
7085 tcg_gen_dupi_vec(vece, t, mask);
7086 tcg_gen_bitsel_vec(vece, d, t, d, n);
7087 tcg_temp_free_vec(t);
7088}
7089
7090static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7091{
7092 static const GVecGen2 ops[3] = {
7093 { .fniv = gen_sqxtnt_vec,
7094 .opt_opc = sqxtn_list,
7095 .load_dest = true,
7096 .fno = gen_helper_sve2_sqxtnt_h,
7097 .vece = MO_16 },
7098 { .fniv = gen_sqxtnt_vec,
7099 .opt_opc = sqxtn_list,
7100 .load_dest = true,
7101 .fno = gen_helper_sve2_sqxtnt_s,
7102 .vece = MO_32 },
7103 { .fniv = gen_sqxtnt_vec,
7104 .opt_opc = sqxtn_list,
7105 .load_dest = true,
7106 .fno = gen_helper_sve2_sqxtnt_d,
7107 .vece = MO_64 },
7108 };
7109 return do_sve2_narrow_extract(s, a, ops);
7110}
7111
7112static const TCGOpcode uqxtn_list[] = {
7113 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7114};
7115
7116static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7117{
7118 TCGv_vec t = tcg_temp_new_vec_matching(d);
7119 int halfbits = 4 << vece;
7120 int64_t max = (1ull << halfbits) - 1;
7121
7122 tcg_gen_dupi_vec(vece, t, max);
7123 tcg_gen_umin_vec(vece, d, n, t);
7124 tcg_temp_free_vec(t);
7125}
7126
7127static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7128{
7129 static const GVecGen2 ops[3] = {
7130 { .fniv = gen_uqxtnb_vec,
7131 .opt_opc = uqxtn_list,
7132 .fno = gen_helper_sve2_uqxtnb_h,
7133 .vece = MO_16 },
7134 { .fniv = gen_uqxtnb_vec,
7135 .opt_opc = uqxtn_list,
7136 .fno = gen_helper_sve2_uqxtnb_s,
7137 .vece = MO_32 },
7138 { .fniv = gen_uqxtnb_vec,
7139 .opt_opc = uqxtn_list,
7140 .fno = gen_helper_sve2_uqxtnb_d,
7141 .vece = MO_64 },
7142 };
7143 return do_sve2_narrow_extract(s, a, ops);
7144}
7145
7146static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7147{
7148 TCGv_vec t = tcg_temp_new_vec_matching(d);
7149 int halfbits = 4 << vece;
7150 int64_t max = (1ull << halfbits) - 1;
7151
7152 tcg_gen_dupi_vec(vece, t, max);
7153 tcg_gen_umin_vec(vece, n, n, t);
7154 tcg_gen_shli_vec(vece, n, n, halfbits);
7155 tcg_gen_bitsel_vec(vece, d, t, d, n);
7156 tcg_temp_free_vec(t);
7157}
7158
7159static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7160{
7161 static const GVecGen2 ops[3] = {
7162 { .fniv = gen_uqxtnt_vec,
7163 .opt_opc = uqxtn_list,
7164 .load_dest = true,
7165 .fno = gen_helper_sve2_uqxtnt_h,
7166 .vece = MO_16 },
7167 { .fniv = gen_uqxtnt_vec,
7168 .opt_opc = uqxtn_list,
7169 .load_dest = true,
7170 .fno = gen_helper_sve2_uqxtnt_s,
7171 .vece = MO_32 },
7172 { .fniv = gen_uqxtnt_vec,
7173 .opt_opc = uqxtn_list,
7174 .load_dest = true,
7175 .fno = gen_helper_sve2_uqxtnt_d,
7176 .vece = MO_64 },
7177 };
7178 return do_sve2_narrow_extract(s, a, ops);
7179}
7180
7181static const TCGOpcode sqxtun_list[] = {
7182 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7183};
7184
7185static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7186{
7187 TCGv_vec t = tcg_temp_new_vec_matching(d);
7188 int halfbits = 4 << vece;
7189 int64_t max = (1ull << halfbits) - 1;
7190
7191 tcg_gen_dupi_vec(vece, t, 0);
7192 tcg_gen_smax_vec(vece, d, n, t);
7193 tcg_gen_dupi_vec(vece, t, max);
7194 tcg_gen_umin_vec(vece, d, d, t);
7195 tcg_temp_free_vec(t);
7196}
7197
7198static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7199{
7200 static const GVecGen2 ops[3] = {
7201 { .fniv = gen_sqxtunb_vec,
7202 .opt_opc = sqxtun_list,
7203 .fno = gen_helper_sve2_sqxtunb_h,
7204 .vece = MO_16 },
7205 { .fniv = gen_sqxtunb_vec,
7206 .opt_opc = sqxtun_list,
7207 .fno = gen_helper_sve2_sqxtunb_s,
7208 .vece = MO_32 },
7209 { .fniv = gen_sqxtunb_vec,
7210 .opt_opc = sqxtun_list,
7211 .fno = gen_helper_sve2_sqxtunb_d,
7212 .vece = MO_64 },
7213 };
7214 return do_sve2_narrow_extract(s, a, ops);
7215}
7216
7217static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7218{
7219 TCGv_vec t = tcg_temp_new_vec_matching(d);
7220 int halfbits = 4 << vece;
7221 int64_t max = (1ull << halfbits) - 1;
7222
7223 tcg_gen_dupi_vec(vece, t, 0);
7224 tcg_gen_smax_vec(vece, n, n, t);
7225 tcg_gen_dupi_vec(vece, t, max);
7226 tcg_gen_umin_vec(vece, n, n, t);
7227 tcg_gen_shli_vec(vece, n, n, halfbits);
7228 tcg_gen_bitsel_vec(vece, d, t, d, n);
7229 tcg_temp_free_vec(t);
7230}
7231
7232static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7233{
7234 static const GVecGen2 ops[3] = {
7235 { .fniv = gen_sqxtunt_vec,
7236 .opt_opc = sqxtun_list,
7237 .load_dest = true,
7238 .fno = gen_helper_sve2_sqxtunt_h,
7239 .vece = MO_16 },
7240 { .fniv = gen_sqxtunt_vec,
7241 .opt_opc = sqxtun_list,
7242 .load_dest = true,
7243 .fno = gen_helper_sve2_sqxtunt_s,
7244 .vece = MO_32 },
7245 { .fniv = gen_sqxtunt_vec,
7246 .opt_opc = sqxtun_list,
7247 .load_dest = true,
7248 .fno = gen_helper_sve2_sqxtunt_d,
7249 .vece = MO_64 },
7250 };
7251 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7252}
7253
7254static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7255 const GVecGen2i ops[3])
7256{
7257 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7258 return false;
7259 }
7260 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7261 if (sve_access_check(s)) {
7262 unsigned vsz = vec_full_reg_size(s);
7263 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7264 vec_full_reg_offset(s, a->rn),
7265 vsz, vsz, a->imm, &ops[a->esz]);
7266 }
7267 return true;
7268}
7269
7270static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7271{
7272 int halfbits = 4 << vece;
7273 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7274
7275 tcg_gen_shri_i64(d, n, shr);
7276 tcg_gen_andi_i64(d, d, mask);
7277}
7278
7279static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7280{
7281 gen_shrnb_i64(MO_16, d, n, shr);
7282}
7283
7284static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7285{
7286 gen_shrnb_i64(MO_32, d, n, shr);
7287}
7288
7289static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7290{
7291 gen_shrnb_i64(MO_64, d, n, shr);
7292}
7293
7294static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7295{
7296 TCGv_vec t = tcg_temp_new_vec_matching(d);
7297 int halfbits = 4 << vece;
7298 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7299
7300 tcg_gen_shri_vec(vece, n, n, shr);
7301 tcg_gen_dupi_vec(vece, t, mask);
7302 tcg_gen_and_vec(vece, d, n, t);
7303 tcg_temp_free_vec(t);
7304}
7305
7306static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7307{
7308 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7309 static const GVecGen2i ops[3] = {
7310 { .fni8 = gen_shrnb16_i64,
7311 .fniv = gen_shrnb_vec,
7312 .opt_opc = vec_list,
7313 .fno = gen_helper_sve2_shrnb_h,
7314 .vece = MO_16 },
7315 { .fni8 = gen_shrnb32_i64,
7316 .fniv = gen_shrnb_vec,
7317 .opt_opc = vec_list,
7318 .fno = gen_helper_sve2_shrnb_s,
7319 .vece = MO_32 },
7320 { .fni8 = gen_shrnb64_i64,
7321 .fniv = gen_shrnb_vec,
7322 .opt_opc = vec_list,
7323 .fno = gen_helper_sve2_shrnb_d,
7324 .vece = MO_64 },
7325 };
7326 return do_sve2_shr_narrow(s, a, ops);
7327}
7328
7329static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7330{
7331 int halfbits = 4 << vece;
7332 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7333
7334 tcg_gen_shli_i64(n, n, halfbits - shr);
7335 tcg_gen_andi_i64(n, n, ~mask);
7336 tcg_gen_andi_i64(d, d, mask);
7337 tcg_gen_or_i64(d, d, n);
7338}
7339
7340static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7341{
7342 gen_shrnt_i64(MO_16, d, n, shr);
7343}
7344
7345static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7346{
7347 gen_shrnt_i64(MO_32, d, n, shr);
7348}
7349
7350static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7351{
7352 tcg_gen_shri_i64(n, n, shr);
7353 tcg_gen_deposit_i64(d, d, n, 32, 32);
7354}
7355
7356static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7357{
7358 TCGv_vec t = tcg_temp_new_vec_matching(d);
7359 int halfbits = 4 << vece;
7360 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7361
7362 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7363 tcg_gen_dupi_vec(vece, t, mask);
7364 tcg_gen_bitsel_vec(vece, d, t, d, n);
7365 tcg_temp_free_vec(t);
7366}
7367
7368static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7369{
7370 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7371 static const GVecGen2i ops[3] = {
7372 { .fni8 = gen_shrnt16_i64,
7373 .fniv = gen_shrnt_vec,
7374 .opt_opc = vec_list,
7375 .load_dest = true,
7376 .fno = gen_helper_sve2_shrnt_h,
7377 .vece = MO_16 },
7378 { .fni8 = gen_shrnt32_i64,
7379 .fniv = gen_shrnt_vec,
7380 .opt_opc = vec_list,
7381 .load_dest = true,
7382 .fno = gen_helper_sve2_shrnt_s,
7383 .vece = MO_32 },
7384 { .fni8 = gen_shrnt64_i64,
7385 .fniv = gen_shrnt_vec,
7386 .opt_opc = vec_list,
7387 .load_dest = true,
7388 .fno = gen_helper_sve2_shrnt_d,
7389 .vece = MO_64 },
7390 };
7391 return do_sve2_shr_narrow(s, a, ops);
7392}
7393
7394static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7395{
7396 static const GVecGen2i ops[3] = {
7397 { .fno = gen_helper_sve2_rshrnb_h },
7398 { .fno = gen_helper_sve2_rshrnb_s },
7399 { .fno = gen_helper_sve2_rshrnb_d },
7400 };
7401 return do_sve2_shr_narrow(s, a, ops);
7402}
7403
7404static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7405{
7406 static const GVecGen2i ops[3] = {
7407 { .fno = gen_helper_sve2_rshrnt_h },
7408 { .fno = gen_helper_sve2_rshrnt_s },
7409 { .fno = gen_helper_sve2_rshrnt_d },
7410 };
7411 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7412}
7413
7414static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7415 TCGv_vec n, int64_t shr)
7416{
7417 TCGv_vec t = tcg_temp_new_vec_matching(d);
7418 int halfbits = 4 << vece;
7419
7420 tcg_gen_sari_vec(vece, n, n, shr);
7421 tcg_gen_dupi_vec(vece, t, 0);
7422 tcg_gen_smax_vec(vece, n, n, t);
7423 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7424 tcg_gen_umin_vec(vece, d, n, t);
7425 tcg_temp_free_vec(t);
7426}
7427
7428static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7429{
7430 static const TCGOpcode vec_list[] = {
7431 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7432 };
7433 static const GVecGen2i ops[3] = {
7434 { .fniv = gen_sqshrunb_vec,
7435 .opt_opc = vec_list,
7436 .fno = gen_helper_sve2_sqshrunb_h,
7437 .vece = MO_16 },
7438 { .fniv = gen_sqshrunb_vec,
7439 .opt_opc = vec_list,
7440 .fno = gen_helper_sve2_sqshrunb_s,
7441 .vece = MO_32 },
7442 { .fniv = gen_sqshrunb_vec,
7443 .opt_opc = vec_list,
7444 .fno = gen_helper_sve2_sqshrunb_d,
7445 .vece = MO_64 },
7446 };
7447 return do_sve2_shr_narrow(s, a, ops);
7448}
7449
7450static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7451 TCGv_vec n, int64_t shr)
7452{
7453 TCGv_vec t = tcg_temp_new_vec_matching(d);
7454 int halfbits = 4 << vece;
7455
7456 tcg_gen_sari_vec(vece, n, n, shr);
7457 tcg_gen_dupi_vec(vece, t, 0);
7458 tcg_gen_smax_vec(vece, n, n, t);
7459 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7460 tcg_gen_umin_vec(vece, n, n, t);
7461 tcg_gen_shli_vec(vece, n, n, halfbits);
7462 tcg_gen_bitsel_vec(vece, d, t, d, n);
7463 tcg_temp_free_vec(t);
7464}
7465
7466static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7467{
7468 static const TCGOpcode vec_list[] = {
7469 INDEX_op_shli_vec, INDEX_op_sari_vec,
7470 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7471 };
7472 static const GVecGen2i ops[3] = {
7473 { .fniv = gen_sqshrunt_vec,
7474 .opt_opc = vec_list,
7475 .load_dest = true,
7476 .fno = gen_helper_sve2_sqshrunt_h,
7477 .vece = MO_16 },
7478 { .fniv = gen_sqshrunt_vec,
7479 .opt_opc = vec_list,
7480 .load_dest = true,
7481 .fno = gen_helper_sve2_sqshrunt_s,
7482 .vece = MO_32 },
7483 { .fniv = gen_sqshrunt_vec,
7484 .opt_opc = vec_list,
7485 .load_dest = true,
7486 .fno = gen_helper_sve2_sqshrunt_d,
7487 .vece = MO_64 },
7488 };
7489 return do_sve2_shr_narrow(s, a, ops);
7490}
7491
7492static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7493{
7494 static const GVecGen2i ops[3] = {
7495 { .fno = gen_helper_sve2_sqrshrunb_h },
7496 { .fno = gen_helper_sve2_sqrshrunb_s },
7497 { .fno = gen_helper_sve2_sqrshrunb_d },
7498 };
7499 return do_sve2_shr_narrow(s, a, ops);
7500}
7501
7502static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7503{
7504 static const GVecGen2i ops[3] = {
7505 { .fno = gen_helper_sve2_sqrshrunt_h },
7506 { .fno = gen_helper_sve2_sqrshrunt_s },
7507 { .fno = gen_helper_sve2_sqrshrunt_d },
7508 };
7509 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7510}
7511
743bb147
RH
7512static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7513 TCGv_vec n, int64_t shr)
7514{
7515 TCGv_vec t = tcg_temp_new_vec_matching(d);
7516 int halfbits = 4 << vece;
7517 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7518 int64_t min = -max - 1;
7519
7520 tcg_gen_sari_vec(vece, n, n, shr);
7521 tcg_gen_dupi_vec(vece, t, min);
7522 tcg_gen_smax_vec(vece, n, n, t);
7523 tcg_gen_dupi_vec(vece, t, max);
7524 tcg_gen_smin_vec(vece, n, n, t);
7525 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7526 tcg_gen_and_vec(vece, d, n, t);
7527 tcg_temp_free_vec(t);
7528}
7529
7530static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7531{
7532 static const TCGOpcode vec_list[] = {
7533 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7534 };
7535 static const GVecGen2i ops[3] = {
7536 { .fniv = gen_sqshrnb_vec,
7537 .opt_opc = vec_list,
7538 .fno = gen_helper_sve2_sqshrnb_h,
7539 .vece = MO_16 },
7540 { .fniv = gen_sqshrnb_vec,
7541 .opt_opc = vec_list,
7542 .fno = gen_helper_sve2_sqshrnb_s,
7543 .vece = MO_32 },
7544 { .fniv = gen_sqshrnb_vec,
7545 .opt_opc = vec_list,
7546 .fno = gen_helper_sve2_sqshrnb_d,
7547 .vece = MO_64 },
7548 };
7549 return do_sve2_shr_narrow(s, a, ops);
7550}
7551
7552static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7553 TCGv_vec n, int64_t shr)
7554{
7555 TCGv_vec t = tcg_temp_new_vec_matching(d);
7556 int halfbits = 4 << vece;
7557 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7558 int64_t min = -max - 1;
7559
7560 tcg_gen_sari_vec(vece, n, n, shr);
7561 tcg_gen_dupi_vec(vece, t, min);
7562 tcg_gen_smax_vec(vece, n, n, t);
7563 tcg_gen_dupi_vec(vece, t, max);
7564 tcg_gen_smin_vec(vece, n, n, t);
7565 tcg_gen_shli_vec(vece, n, n, halfbits);
7566 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7567 tcg_gen_bitsel_vec(vece, d, t, d, n);
7568 tcg_temp_free_vec(t);
7569}
7570
7571static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7572{
7573 static const TCGOpcode vec_list[] = {
7574 INDEX_op_shli_vec, INDEX_op_sari_vec,
7575 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7576 };
7577 static const GVecGen2i ops[3] = {
7578 { .fniv = gen_sqshrnt_vec,
7579 .opt_opc = vec_list,
7580 .load_dest = true,
7581 .fno = gen_helper_sve2_sqshrnt_h,
7582 .vece = MO_16 },
7583 { .fniv = gen_sqshrnt_vec,
7584 .opt_opc = vec_list,
7585 .load_dest = true,
7586 .fno = gen_helper_sve2_sqshrnt_s,
7587 .vece = MO_32 },
7588 { .fniv = gen_sqshrnt_vec,
7589 .opt_opc = vec_list,
7590 .load_dest = true,
7591 .fno = gen_helper_sve2_sqshrnt_d,
7592 .vece = MO_64 },
7593 };
7594 return do_sve2_shr_narrow(s, a, ops);
7595}
7596
7597static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7598{
7599 static const GVecGen2i ops[3] = {
7600 { .fno = gen_helper_sve2_sqrshrnb_h },
7601 { .fno = gen_helper_sve2_sqrshrnb_s },
7602 { .fno = gen_helper_sve2_sqrshrnb_d },
7603 };
7604 return do_sve2_shr_narrow(s, a, ops);
7605}
7606
7607static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7608{
7609 static const GVecGen2i ops[3] = {
7610 { .fno = gen_helper_sve2_sqrshrnt_h },
7611 { .fno = gen_helper_sve2_sqrshrnt_s },
7612 { .fno = gen_helper_sve2_sqrshrnt_d },
7613 };
7614 return do_sve2_shr_narrow(s, a, ops);
7615}
7616
c13418da
RH
7617static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7618 TCGv_vec n, int64_t shr)
7619{
7620 TCGv_vec t = tcg_temp_new_vec_matching(d);
7621 int halfbits = 4 << vece;
7622
7623 tcg_gen_shri_vec(vece, n, n, shr);
7624 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7625 tcg_gen_umin_vec(vece, d, n, t);
7626 tcg_temp_free_vec(t);
7627}
7628
7629static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7630{
7631 static const TCGOpcode vec_list[] = {
7632 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7633 };
7634 static const GVecGen2i ops[3] = {
7635 { .fniv = gen_uqshrnb_vec,
7636 .opt_opc = vec_list,
7637 .fno = gen_helper_sve2_uqshrnb_h,
7638 .vece = MO_16 },
7639 { .fniv = gen_uqshrnb_vec,
7640 .opt_opc = vec_list,
7641 .fno = gen_helper_sve2_uqshrnb_s,
7642 .vece = MO_32 },
7643 { .fniv = gen_uqshrnb_vec,
7644 .opt_opc = vec_list,
7645 .fno = gen_helper_sve2_uqshrnb_d,
7646 .vece = MO_64 },
7647 };
7648 return do_sve2_shr_narrow(s, a, ops);
7649}
7650
7651static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7652 TCGv_vec n, int64_t shr)
7653{
7654 TCGv_vec t = tcg_temp_new_vec_matching(d);
7655 int halfbits = 4 << vece;
7656
7657 tcg_gen_shri_vec(vece, n, n, shr);
7658 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7659 tcg_gen_umin_vec(vece, n, n, t);
7660 tcg_gen_shli_vec(vece, n, n, halfbits);
7661 tcg_gen_bitsel_vec(vece, d, t, d, n);
7662 tcg_temp_free_vec(t);
7663}
7664
7665static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7666{
7667 static const TCGOpcode vec_list[] = {
7668 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7669 };
7670 static const GVecGen2i ops[3] = {
7671 { .fniv = gen_uqshrnt_vec,
7672 .opt_opc = vec_list,
7673 .load_dest = true,
7674 .fno = gen_helper_sve2_uqshrnt_h,
7675 .vece = MO_16 },
7676 { .fniv = gen_uqshrnt_vec,
7677 .opt_opc = vec_list,
7678 .load_dest = true,
7679 .fno = gen_helper_sve2_uqshrnt_s,
7680 .vece = MO_32 },
7681 { .fniv = gen_uqshrnt_vec,
7682 .opt_opc = vec_list,
7683 .load_dest = true,
7684 .fno = gen_helper_sve2_uqshrnt_d,
7685 .vece = MO_64 },
7686 };
7687 return do_sve2_shr_narrow(s, a, ops);
7688}
7689
7690static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7691{
7692 static const GVecGen2i ops[3] = {
7693 { .fno = gen_helper_sve2_uqrshrnb_h },
7694 { .fno = gen_helper_sve2_uqrshrnb_s },
7695 { .fno = gen_helper_sve2_uqrshrnb_d },
7696 };
7697 return do_sve2_shr_narrow(s, a, ops);
7698}
7699
7700static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7701{
7702 static const GVecGen2i ops[3] = {
7703 { .fno = gen_helper_sve2_uqrshrnt_h },
7704 { .fno = gen_helper_sve2_uqrshrnt_s },
7705 { .fno = gen_helper_sve2_uqrshrnt_d },
7706 };
7707 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7708}
b87dbeeb 7709
40d5ea50 7710#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7711 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7712 NULL, gen_helper_sve2_##name##_h, \
7713 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7714 }; \
bd394cf5
RH
7715 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7716 name##_fns[a->esz], a, 0)
40d5ea50
SL
7717
7718DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7719DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7720DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7721DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7722
c3cd6766
SL
7723DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7724DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7725DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7726DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7727
e0ae6ec3
SL
7728static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
7729 gen_helper_gvec_flags_4 *fn)
7730{
7731 if (!dc_isar_feature(aa64_sve2, s)) {
7732 return false;
7733 }
7734 return do_ppzz_flags(s, a, fn);
7735}
7736
7737#define DO_SVE2_PPZZ_MATCH(NAME, name) \
7738static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7739{ \
7740 static gen_helper_gvec_flags_4 * const fns[4] = { \
7741 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
7742 NULL, NULL \
7743 }; \
7744 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
7745}
7746
7747DO_SVE2_PPZZ_MATCH(MATCH, match)
7748DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
7749
7d47ac94
SL
7750static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
7751{
7752 static gen_helper_gvec_4 * const fns[2] = {
7753 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7754 };
7755 if (a->esz < 2) {
7756 return false;
7757 }
7758 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
7759}
7760
bd394cf5
RH
7761TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7762 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7763
b87dbeeb
SL
7764static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7765 gen_helper_gvec_4_ptr *fn)
7766{
7767 if (!dc_isar_feature(aa64_sve2, s)) {
7768 return false;
7769 }
7770 return do_zpzz_fp(s, a, fn);
7771}
7772
7773#define DO_SVE2_ZPZZ_FP(NAME, name) \
7774static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7775{ \
7776 static gen_helper_gvec_4_ptr * const fns[4] = { \
7777 NULL, gen_helper_sve2_##name##_zpzz_h, \
7778 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7779 }; \
7780 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7781}
7782
7783DO_SVE2_ZPZZ_FP(FADDP, faddp)
7784DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7785DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7786DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7787DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7788
7789/*
7790 * SVE Integer Multiply-Add (unpredicated)
7791 */
7792
4f26756b
SL
7793static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
7794{
7795 gen_helper_gvec_4_ptr *fn;
7796
7797 switch (a->esz) {
7798 case MO_32:
7799 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
7800 return false;
7801 }
7802 fn = gen_helper_fmmla_s;
7803 break;
7804 case MO_64:
7805 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
7806 return false;
7807 }
7808 fn = gen_helper_fmmla_d;
7809 break;
7810 default:
7811 return false;
7812 }
7813
7814 if (sve_access_check(s)) {
7815 unsigned vsz = vec_full_reg_size(s);
7816 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7817 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7818 vec_full_reg_offset(s, a->rn),
7819 vec_full_reg_offset(s, a->rm),
7820 vec_full_reg_offset(s, a->ra),
7821 status, vsz, vsz, 0, fn);
7822 tcg_temp_free_ptr(status);
7823 }
7824 return true;
7825}
7826
eeb4e84d
RH
7827static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7828 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7829 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7830};
7831TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7832 sqdmlal_zzzw_fns[a->esz], a, 0)
7833TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7834 sqdmlal_zzzw_fns[a->esz], a, 3)
7835TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7836 sqdmlal_zzzw_fns[a->esz], a, 2)
7837
7838static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7839 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7840 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7841};
7842TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7843 sqdmlsl_zzzw_fns[a->esz], a, 0)
7844TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7845 sqdmlsl_zzzw_fns[a->esz], a, 3)
7846TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7847 sqdmlsl_zzzw_fns[a->esz], a, 2)
7848
7849static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7850 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7851 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7852};
7853TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7854 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7855
eeb4e84d
RH
7856static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7857 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7858 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7859};
7860TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7861 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7862
eeb4e84d
RH
7863static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7864 NULL, gen_helper_sve2_smlal_zzzw_h,
7865 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7866};
7867TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7868 smlal_zzzw_fns[a->esz], a, 0)
7869TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7870 smlal_zzzw_fns[a->esz], a, 1)
7871
7872static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7873 NULL, gen_helper_sve2_umlal_zzzw_h,
7874 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7875};
7876TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7877 umlal_zzzw_fns[a->esz], a, 0)
7878TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7879 umlal_zzzw_fns[a->esz], a, 1)
7880
7881static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7882 NULL, gen_helper_sve2_smlsl_zzzw_h,
7883 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7884};
7885TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7886 smlsl_zzzw_fns[a->esz], a, 0)
7887TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7888 smlsl_zzzw_fns[a->esz], a, 1)
7889
7890static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7891 NULL, gen_helper_sve2_umlsl_zzzw_h,
7892 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7893};
7894TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7895 umlsl_zzzw_fns[a->esz], a, 0)
7896TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7897 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7898
5f425b92
RH
7899static gen_helper_gvec_4 * const cmla_fns[] = {
7900 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7901 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7902};
7903TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7904 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7905
5f425b92
RH
7906static gen_helper_gvec_4 * const cdot_fns[] = {
7907 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7908};
7909TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7910 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7911
5f425b92
RH
7912static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7913 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7914 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7915};
7916TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7917 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7918
8740d694
RH
7919TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7920 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7921
0ea3cdbf
RH
7922TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7923 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7924
32e2ad65
RH
7925TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7926 gen_helper_crypto_aese, a, false)
7927TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7928 gen_helper_crypto_aese, a, true)
3cc7a88e 7929
32e2ad65
RH
7930TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7931 gen_helper_crypto_sm4e, a, 0)
7932TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7933 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f
RH
7934
7935static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
7936{
7937 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
7938 return false;
7939 }
7940 if (sve_access_check(s)) {
7941 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
7942 }
7943 return true;
7944}
5c1b7226
RH
7945
7946static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
7947{
7948 if (!dc_isar_feature(aa64_sve2, s)) {
7949 return false;
7950 }
7951 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
7952}
7953
d29b17ca
RH
7954static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
7955{
7956 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7957 return false;
7958 }
7959 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
7960}
7961
5c1b7226
RH
7962static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
7963{
7964 if (!dc_isar_feature(aa64_sve2, s)) {
7965 return false;
7966 }
7967 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
7968}
83c2523f
SL
7969
7970static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
7971{
7972 if (!dc_isar_feature(aa64_sve2, s)) {
7973 return false;
7974 }
7975 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
7976}
7977
7978static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
7979{
7980 if (!dc_isar_feature(aa64_sve2, s)) {
7981 return false;
7982 }
7983 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
7984}
95365277
SL
7985
7986static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
7987{
7988 if (!dc_isar_feature(aa64_sve2, s)) {
7989 return false;
7990 }
7991 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
7992}
7993
7994static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
7995{
7996 if (!dc_isar_feature(aa64_sve2, s)) {
7997 return false;
7998 }
7999 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8000}
631be02e
SL
8001
8002static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8003{
8004 static gen_helper_gvec_3_ptr * const fns[] = {
8005 NULL, gen_helper_flogb_h,
8006 gen_helper_flogb_s, gen_helper_flogb_d
8007 };
8008
8009 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8010 return false;
8011 }
8012 if (sve_access_check(s)) {
8013 TCGv_ptr status =
8014 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8015 unsigned vsz = vec_full_reg_size(s);
8016
8017 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8018 vec_full_reg_offset(s, a->rn),
8019 pred_full_reg_offset(s, a->pg),
8020 status, vsz, vsz, 0, fns[a->esz]);
8021 tcg_temp_free_ptr(status);
8022 }
8023 return true;
8024}
50d102bd
SL
8025
8026static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8027{
8028 if (!dc_isar_feature(aa64_sve2, s)) {
8029 return false;
8030 }
8031 if (sve_access_check(s)) {
8032 unsigned vsz = vec_full_reg_size(s);
8033 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8034 vec_full_reg_offset(s, a->rn),
8035 vec_full_reg_offset(s, a->rm),
8036 vec_full_reg_offset(s, a->ra),
8037 cpu_env, vsz, vsz, (sel << 1) | sub,
8038 gen_helper_sve2_fmlal_zzzw_s);
8039 }
8040 return true;
8041}
8042
8043static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8044{
8045 return do_FMLAL_zzzw(s, a, false, false);
8046}
8047
8048static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8049{
8050 return do_FMLAL_zzzw(s, a, false, true);
8051}
8052
8053static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8054{
8055 return do_FMLAL_zzzw(s, a, true, false);
8056}
8057
8058static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8059{
8060 return do_FMLAL_zzzw(s, a, true, true);
8061}
8062
8063static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8064{
8065 if (!dc_isar_feature(aa64_sve2, s)) {
8066 return false;
8067 }
8068 if (sve_access_check(s)) {
8069 unsigned vsz = vec_full_reg_size(s);
8070 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8071 vec_full_reg_offset(s, a->rn),
8072 vec_full_reg_offset(s, a->rm),
8073 vec_full_reg_offset(s, a->ra),
8074 cpu_env, vsz, vsz,
8075 (a->index << 2) | (sel << 1) | sub,
8076 gen_helper_sve2_fmlal_zzxw_s);
8077 }
8078 return true;
8079}
8080
8081static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8082{
8083 return do_FMLAL_zzxw(s, a, false, false);
8084}
8085
8086static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8087{
8088 return do_FMLAL_zzxw(s, a, false, true);
8089}
8090
8091static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8092{
8093 return do_FMLAL_zzxw(s, a, true, false);
8094}
8095
8096static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8097{
8098 return do_FMLAL_zzxw(s, a, true, true);
8099}
2323c5ff 8100
eec05e4e
RH
8101TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8102 gen_helper_gvec_smmla_b, a, 0)
8103TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8104 gen_helper_gvec_usmmla_b, a, 0)
8105TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8106 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 8107
eec05e4e
RH
8108TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8109 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
8110TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
8111 gen_helper_gvec_bfdot_idx, a)
81266a1f 8112
eec05e4e
RH
8113TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8114 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
8115
8116static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8117{
8118 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8119 return false;
8120 }
8121 if (sve_access_check(s)) {
8122 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8123 unsigned vsz = vec_full_reg_size(s);
8124
8125 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8126 vec_full_reg_offset(s, a->rn),
8127 vec_full_reg_offset(s, a->rm),
8128 vec_full_reg_offset(s, a->ra),
8129 status, vsz, vsz, sel,
8130 gen_helper_gvec_bfmlal);
8131 tcg_temp_free_ptr(status);
8132 }
8133 return true;
8134}
8135
8136static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8137{
8138 return do_BFMLAL_zzzw(s, a, false);
8139}
8140
8141static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8142{
8143 return do_BFMLAL_zzzw(s, a, true);
8144}
458d0ab6
RH
8145
8146static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8147{
8148 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8149 return false;
8150 }
8151 if (sve_access_check(s)) {
8152 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8153 unsigned vsz = vec_full_reg_size(s);
8154
8155 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8156 vec_full_reg_offset(s, a->rn),
8157 vec_full_reg_offset(s, a->rm),
8158 vec_full_reg_offset(s, a->ra),
8159 status, vsz, vsz, (a->index << 1) | sel,
8160 gen_helper_gvec_bfmlal_idx);
8161 tcg_temp_free_ptr(status);
8162 }
8163 return true;
8164}
8165
8166static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8167{
8168 return do_BFMLAL_zzxw(s, a, false);
8169}
8170
8171static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8172{
8173 return do_BFMLAL_zzxw(s, a, true);
8174}