]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for FMMLA
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638 178/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 179static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
180 int rd, int rn, int rm, int ra, int data)
181{
7ad416b1
RH
182 if (fn == NULL) {
183 return false;
184 }
185 if (sve_access_check(s)) {
186 unsigned vsz = vec_full_reg_size(s);
187 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
188 vec_full_reg_offset(s, rn),
189 vec_full_reg_offset(s, rm),
190 vec_full_reg_offset(s, ra),
191 vsz, vsz, data, fn);
192 }
193 return true;
38650638
RH
194}
195
cab79ac9
RH
196static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
197 arg_rrrr_esz *a, int data)
198{
199 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
200}
201
e82d3536
RH
202static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
203 arg_rrxr_esz *a)
204{
205 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
206}
207
41bf9b67
RH
208/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
209static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
210 int rd, int rn, int rm, int ra,
211 int data, TCGv_ptr ptr)
212{
213 if (fn == NULL) {
214 return false;
215 }
216 if (sve_access_check(s)) {
217 unsigned vsz = vec_full_reg_size(s);
218 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
219 vec_full_reg_offset(s, rn),
220 vec_full_reg_offset(s, rm),
221 vec_full_reg_offset(s, ra),
222 ptr, vsz, vsz, data, fn);
223 }
224 return true;
225}
226
227static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
228 int rd, int rn, int rm, int ra,
229 int data, ARMFPStatusFlavour flavour)
230{
231 TCGv_ptr status = fpstatus_ptr(flavour);
232 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
233 tcg_temp_free_ptr(status);
234 return ret;
235}
236
96a461f7 237/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 238static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
239 int rd, int rn, int pg, int data)
240{
8fb27a21
RH
241 if (fn == NULL) {
242 return false;
243 }
244 if (sve_access_check(s)) {
245 unsigned vsz = vec_full_reg_size(s);
246 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
247 vec_full_reg_offset(s, rn),
248 pred_full_reg_offset(s, pg),
249 vsz, vsz, data, fn);
250 }
251 return true;
96a461f7
RH
252}
253
b051809a
RH
254static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
255 arg_rpr_esz *a, int data)
256{
257 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
258}
259
afa2529c
RH
260static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
261 arg_rpri_esz *a)
262{
263 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
264}
b051809a 265
36cbb7a8 266/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 267static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
268 int rd, int rn, int rm, int pg, int data)
269{
2a753d1e
RH
270 if (fn == NULL) {
271 return false;
272 }
273 if (sve_access_check(s)) {
274 unsigned vsz = vec_full_reg_size(s);
275 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
276 vec_full_reg_offset(s, rn),
277 vec_full_reg_offset(s, rm),
278 pred_full_reg_offset(s, pg),
279 vsz, vsz, data, fn);
280 }
281 return true;
36cbb7a8 282}
f7d79c41 283
312016c9
RH
284static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
285 arg_rprr_esz *a, int data)
286{
287 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
288}
289
faf915e2
RH
290/* Invoke a vector expander on two Zregs and an immediate. */
291static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
292 int esz, int rd, int rn, uint64_t imm)
293{
294 if (gvec_fn == NULL) {
295 return false;
296 }
297 if (sve_access_check(s)) {
298 unsigned vsz = vec_full_reg_size(s);
299 gvec_fn(esz, vec_full_reg_offset(s, rd),
300 vec_full_reg_offset(s, rn), imm, vsz, vsz);
301 }
302 return true;
303}
304
ada378f0
RH
305static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
306 arg_rri_esz *a)
307{
308 if (a->esz < 0) {
309 /* Invalid tsz encoding -- see tszimm_esz. */
310 return false;
311 }
312 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
313}
314
39eea561 315/* Invoke a vector expander on three Zregs. */
50f6db5f 316static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 317 int esz, int rd, int rn, int rm)
38388f7e 318{
50f6db5f
RH
319 if (gvec_fn == NULL) {
320 return false;
321 }
322 if (sve_access_check(s)) {
323 unsigned vsz = vec_full_reg_size(s);
324 gvec_fn(esz, vec_full_reg_offset(s, rd),
325 vec_full_reg_offset(s, rn),
326 vec_full_reg_offset(s, rm), vsz, vsz);
327 }
328 return true;
38388f7e
RH
329}
330
cd54bbe6
RH
331static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
332 arg_rrr_esz *a)
333{
334 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
335}
336
911cdc6d 337/* Invoke a vector expander on four Zregs. */
189876af
RH
338static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
339 arg_rrrr_esz *a)
911cdc6d 340{
189876af
RH
341 if (gvec_fn == NULL) {
342 return false;
343 }
344 if (sve_access_check(s)) {
345 unsigned vsz = vec_full_reg_size(s);
346 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
347 vec_full_reg_offset(s, a->rn),
348 vec_full_reg_offset(s, a->rm),
349 vec_full_reg_offset(s, a->ra), vsz, vsz);
350 }
351 return true;
911cdc6d
RH
352}
353
39eea561
RH
354/* Invoke a vector move on two Zregs. */
355static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 356{
f7d79c41 357 if (sve_access_check(s)) {
5f730621
RH
358 unsigned vsz = vec_full_reg_size(s);
359 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
360 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
361 }
362 return true;
38388f7e
RH
363}
364
d9d78dcc
RH
365/* Initialize a Zreg with replications of a 64-bit immediate. */
366static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
367{
368 unsigned vsz = vec_full_reg_size(s);
8711e71f 369 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
370}
371
516e246a 372/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
373static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
374 int rd, int rn, int rm)
516e246a 375{
dd81a8d7
RH
376 unsigned psz = pred_gvec_reg_size(s);
377 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
378 pred_full_reg_offset(s, rn),
379 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
380}
381
382/* Invoke a vector move on two Pregs. */
383static bool do_mov_p(DisasContext *s, int rd, int rn)
384{
d0b2df5a
RH
385 if (sve_access_check(s)) {
386 unsigned psz = pred_gvec_reg_size(s);
387 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
388 pred_full_reg_offset(s, rn), psz, psz);
389 }
390 return true;
516e246a
RH
391}
392
9e18d7a6
RH
393/* Set the cpu flags as per a return from an SVE helper. */
394static void do_pred_flags(TCGv_i32 t)
395{
396 tcg_gen_mov_i32(cpu_NF, t);
397 tcg_gen_andi_i32(cpu_ZF, t, 2);
398 tcg_gen_andi_i32(cpu_CF, t, 1);
399 tcg_gen_movi_i32(cpu_VF, 0);
400}
401
402/* Subroutines computing the ARM PredTest psuedofunction. */
403static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
404{
405 TCGv_i32 t = tcg_temp_new_i32();
406
407 gen_helper_sve_predtest1(t, d, g);
408 do_pred_flags(t);
409 tcg_temp_free_i32(t);
410}
411
412static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
413{
414 TCGv_ptr dptr = tcg_temp_new_ptr();
415 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 416 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
417
418 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
419 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 420
392acacc 421 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
422 tcg_temp_free_ptr(dptr);
423 tcg_temp_free_ptr(gptr);
424
425 do_pred_flags(t);
426 tcg_temp_free_i32(t);
427}
428
028e2a7b
RH
429/* For each element size, the bits within a predicate word that are active. */
430const uint64_t pred_esz_masks[4] = {
431 0xffffffffffffffffull, 0x5555555555555555ull,
432 0x1111111111111111ull, 0x0101010101010101ull
433};
434
c437c59b
RH
435static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
436{
437 unallocated_encoding(s);
438 return true;
439}
440
39eea561
RH
441/*
442 *** SVE Logical - Unpredicated Group
443 */
444
b262215b
RH
445TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
446TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
447TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
448TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 449
e6eba6e5
RH
450static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
451{
452 TCGv_i64 t = tcg_temp_new_i64();
453 uint64_t mask = dup_const(MO_8, 0xff >> sh);
454
455 tcg_gen_xor_i64(t, n, m);
456 tcg_gen_shri_i64(d, t, sh);
457 tcg_gen_shli_i64(t, t, 8 - sh);
458 tcg_gen_andi_i64(d, d, mask);
459 tcg_gen_andi_i64(t, t, ~mask);
460 tcg_gen_or_i64(d, d, t);
461 tcg_temp_free_i64(t);
462}
463
464static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
465{
466 TCGv_i64 t = tcg_temp_new_i64();
467 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
468
469 tcg_gen_xor_i64(t, n, m);
470 tcg_gen_shri_i64(d, t, sh);
471 tcg_gen_shli_i64(t, t, 16 - sh);
472 tcg_gen_andi_i64(d, d, mask);
473 tcg_gen_andi_i64(t, t, ~mask);
474 tcg_gen_or_i64(d, d, t);
475 tcg_temp_free_i64(t);
476}
477
478static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
479{
480 tcg_gen_xor_i32(d, n, m);
481 tcg_gen_rotri_i32(d, d, sh);
482}
483
484static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
485{
486 tcg_gen_xor_i64(d, n, m);
487 tcg_gen_rotri_i64(d, d, sh);
488}
489
490static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
491 TCGv_vec m, int64_t sh)
492{
493 tcg_gen_xor_vec(vece, d, n, m);
494 tcg_gen_rotri_vec(vece, d, d, sh);
495}
496
497void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
498 uint32_t rm_ofs, int64_t shift,
499 uint32_t opr_sz, uint32_t max_sz)
500{
501 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
502 static const GVecGen3i ops[4] = {
503 { .fni8 = gen_xar8_i64,
504 .fniv = gen_xar_vec,
505 .fno = gen_helper_sve2_xar_b,
506 .opt_opc = vecop,
507 .vece = MO_8 },
508 { .fni8 = gen_xar16_i64,
509 .fniv = gen_xar_vec,
510 .fno = gen_helper_sve2_xar_h,
511 .opt_opc = vecop,
512 .vece = MO_16 },
513 { .fni4 = gen_xar_i32,
514 .fniv = gen_xar_vec,
515 .fno = gen_helper_sve2_xar_s,
516 .opt_opc = vecop,
517 .vece = MO_32 },
518 { .fni8 = gen_xar_i64,
519 .fniv = gen_xar_vec,
520 .fno = gen_helper_gvec_xar_d,
521 .opt_opc = vecop,
522 .vece = MO_64 }
523 };
524 int esize = 8 << vece;
525
526 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
527 tcg_debug_assert(shift >= 0);
528 tcg_debug_assert(shift <= esize);
529 shift &= esize - 1;
530
531 if (shift == 0) {
532 /* xar with no rotate devolves to xor. */
533 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
534 } else {
535 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
536 shift, &ops[vece]);
537 }
538}
539
540static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
541{
542 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
543 return false;
544 }
545 if (sve_access_check(s)) {
546 unsigned vsz = vec_full_reg_size(s);
547 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
548 vec_full_reg_offset(s, a->rn),
549 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
550 }
551 return true;
552}
553
911cdc6d
RH
554static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
555{
556 tcg_gen_xor_i64(d, n, m);
557 tcg_gen_xor_i64(d, d, k);
558}
559
560static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
561 TCGv_vec m, TCGv_vec k)
562{
563 tcg_gen_xor_vec(vece, d, n, m);
564 tcg_gen_xor_vec(vece, d, d, k);
565}
566
567static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
568 uint32_t a, uint32_t oprsz, uint32_t maxsz)
569{
570 static const GVecGen4 op = {
571 .fni8 = gen_eor3_i64,
572 .fniv = gen_eor3_vec,
573 .fno = gen_helper_sve2_eor3,
574 .vece = MO_64,
575 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
576 };
577 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
578}
579
b773a5c8 580TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
581
582static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
583{
584 tcg_gen_andc_i64(d, m, k);
585 tcg_gen_xor_i64(d, d, n);
586}
587
588static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
589 TCGv_vec m, TCGv_vec k)
590{
591 tcg_gen_andc_vec(vece, d, m, k);
592 tcg_gen_xor_vec(vece, d, d, n);
593}
594
595static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
596 uint32_t a, uint32_t oprsz, uint32_t maxsz)
597{
598 static const GVecGen4 op = {
599 .fni8 = gen_bcax_i64,
600 .fniv = gen_bcax_vec,
601 .fno = gen_helper_sve2_bcax,
602 .vece = MO_64,
603 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
604 };
605 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
606}
607
b773a5c8 608TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
609
610static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
611 uint32_t a, uint32_t oprsz, uint32_t maxsz)
612{
613 /* BSL differs from the generic bitsel in argument ordering. */
614 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
615}
616
b773a5c8 617TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
618
619static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
620{
621 tcg_gen_andc_i64(n, k, n);
622 tcg_gen_andc_i64(m, m, k);
623 tcg_gen_or_i64(d, n, m);
624}
625
626static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
627 TCGv_vec m, TCGv_vec k)
628{
629 if (TCG_TARGET_HAS_bitsel_vec) {
630 tcg_gen_not_vec(vece, n, n);
631 tcg_gen_bitsel_vec(vece, d, k, n, m);
632 } else {
633 tcg_gen_andc_vec(vece, n, k, n);
634 tcg_gen_andc_vec(vece, m, m, k);
635 tcg_gen_or_vec(vece, d, n, m);
636 }
637}
638
639static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
640 uint32_t a, uint32_t oprsz, uint32_t maxsz)
641{
642 static const GVecGen4 op = {
643 .fni8 = gen_bsl1n_i64,
644 .fniv = gen_bsl1n_vec,
645 .fno = gen_helper_sve2_bsl1n,
646 .vece = MO_64,
647 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
648 };
649 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
650}
651
b773a5c8 652TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
653
654static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
655{
656 /*
657 * Z[dn] = (n & k) | (~m & ~k)
658 * = | ~(m | k)
659 */
660 tcg_gen_and_i64(n, n, k);
661 if (TCG_TARGET_HAS_orc_i64) {
662 tcg_gen_or_i64(m, m, k);
663 tcg_gen_orc_i64(d, n, m);
664 } else {
665 tcg_gen_nor_i64(m, m, k);
666 tcg_gen_or_i64(d, n, m);
667 }
668}
669
670static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
671 TCGv_vec m, TCGv_vec k)
672{
673 if (TCG_TARGET_HAS_bitsel_vec) {
674 tcg_gen_not_vec(vece, m, m);
675 tcg_gen_bitsel_vec(vece, d, k, n, m);
676 } else {
677 tcg_gen_and_vec(vece, n, n, k);
678 tcg_gen_or_vec(vece, m, m, k);
679 tcg_gen_orc_vec(vece, d, n, m);
680 }
681}
682
683static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
684 uint32_t a, uint32_t oprsz, uint32_t maxsz)
685{
686 static const GVecGen4 op = {
687 .fni8 = gen_bsl2n_i64,
688 .fniv = gen_bsl2n_vec,
689 .fno = gen_helper_sve2_bsl2n,
690 .vece = MO_64,
691 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
692 };
693 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
694}
695
b773a5c8 696TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
697
698static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
699{
700 tcg_gen_and_i64(n, n, k);
701 tcg_gen_andc_i64(m, m, k);
702 tcg_gen_nor_i64(d, n, m);
703}
704
705static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
706 TCGv_vec m, TCGv_vec k)
707{
708 tcg_gen_bitsel_vec(vece, d, k, n, m);
709 tcg_gen_not_vec(vece, d, d);
710}
711
712static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
713 uint32_t a, uint32_t oprsz, uint32_t maxsz)
714{
715 static const GVecGen4 op = {
716 .fni8 = gen_nbsl_i64,
717 .fniv = gen_nbsl_vec,
718 .fno = gen_helper_sve2_nbsl,
719 .vece = MO_64,
720 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
721 };
722 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
723}
724
b773a5c8 725TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 726
fea98f9c
RH
727/*
728 *** SVE Integer Arithmetic - Unpredicated Group
729 */
730
b262215b
RH
731TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
732TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
733TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
734TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
735TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
736TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 737
f97cfd59
RH
738/*
739 *** SVE Integer Arithmetic - Binary Predicated Group
740 */
741
a2103582
RH
742/* Select active elememnts from Zn and inactive elements from Zm,
743 * storing the result in Zd.
744 */
68cc4ee3 745static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
746{
747 static gen_helper_gvec_4 * const fns[4] = {
748 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
749 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
750 };
68cc4ee3 751 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
752}
753
8e7acb24
RH
754#define DO_ZPZZ(NAME, FEAT, name) \
755 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
756 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
757 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 758 }; \
8e7acb24
RH
759 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
760 name##_zpzz_fns[a->esz], a, 0)
761
762DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
763DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
764DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
765DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
766
767DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
768DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
769
770DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
771DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
772DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
773DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
774DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
775DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
776
777DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
778DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
779DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
780
781DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
782DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
783DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
784
785static gen_helper_gvec_4 * const sdiv_fns[4] = {
786 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
787};
788TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 789
8e7acb24
RH
790static gen_helper_gvec_4 * const udiv_fns[4] = {
791 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
792};
793TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 794
3a7be554 795static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582 796{
68cc4ee3 797 return do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
a2103582 798}
d3fe4a29 799
afac6d04
RH
800/*
801 *** SVE Integer Arithmetic - Unary Predicated Group
802 */
803
817bd5c9
RH
804#define DO_ZPZ(NAME, FEAT, name) \
805 static gen_helper_gvec_3 * const name##_fns[4] = { \
806 gen_helper_##name##_b, gen_helper_##name##_h, \
807 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 808 }; \
817bd5c9
RH
809 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
810
811DO_ZPZ(CLS, aa64_sve, sve_cls)
812DO_ZPZ(CLZ, aa64_sve, sve_clz)
813DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
814DO_ZPZ(CNOT, aa64_sve, sve_cnot)
815DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
816DO_ZPZ(ABS, aa64_sve, sve_abs)
817DO_ZPZ(NEG, aa64_sve, sve_neg)
818DO_ZPZ(RBIT, aa64_sve, sve_rbit)
819
820static gen_helper_gvec_3 * const fabs_fns[4] = {
821 NULL, gen_helper_sve_fabs_h,
822 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
823};
824TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 825
817bd5c9
RH
826static gen_helper_gvec_3 * const fneg_fns[4] = {
827 NULL, gen_helper_sve_fneg_h,
828 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
829};
830TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 831
817bd5c9
RH
832static gen_helper_gvec_3 * const sxtb_fns[4] = {
833 NULL, gen_helper_sve_sxtb_h,
834 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
835};
836TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 837
817bd5c9
RH
838static gen_helper_gvec_3 * const uxtb_fns[4] = {
839 NULL, gen_helper_sve_uxtb_h,
840 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
841};
842TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 843
817bd5c9
RH
844static gen_helper_gvec_3 * const sxth_fns[4] = {
845 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
846};
847TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 848
817bd5c9
RH
849static gen_helper_gvec_3 * const uxth_fns[4] = {
850 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
851};
852TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 853
817bd5c9
RH
854TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
855 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
856TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
857 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 858
047cec97
RH
859/*
860 *** SVE Integer Reduction Group
861 */
862
863typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
864static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
865 gen_helper_gvec_reduc *fn)
866{
867 unsigned vsz = vec_full_reg_size(s);
868 TCGv_ptr t_zn, t_pg;
869 TCGv_i32 desc;
870 TCGv_i64 temp;
871
872 if (fn == NULL) {
873 return false;
874 }
875 if (!sve_access_check(s)) {
876 return true;
877 }
878
c6a59b55 879 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
880 temp = tcg_temp_new_i64();
881 t_zn = tcg_temp_new_ptr();
882 t_pg = tcg_temp_new_ptr();
883
884 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
885 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
886 fn(temp, t_zn, t_pg, desc);
887 tcg_temp_free_ptr(t_zn);
888 tcg_temp_free_ptr(t_pg);
047cec97
RH
889
890 write_fp_dreg(s, a->rd, temp);
891 tcg_temp_free_i64(temp);
892 return true;
893}
894
895#define DO_VPZ(NAME, name) \
9ac24f1f 896 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
897 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
898 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
899 }; \
9ac24f1f 900 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
901
902DO_VPZ(ORV, orv)
903DO_VPZ(ANDV, andv)
904DO_VPZ(EORV, eorv)
905
906DO_VPZ(UADDV, uaddv)
907DO_VPZ(SMAXV, smaxv)
908DO_VPZ(UMAXV, umaxv)
909DO_VPZ(SMINV, sminv)
910DO_VPZ(UMINV, uminv)
911
9ac24f1f
RH
912static gen_helper_gvec_reduc * const saddv_fns[4] = {
913 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
914 gen_helper_sve_saddv_s, NULL
915};
916TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
917
918#undef DO_VPZ
919
ccd841c3
RH
920/*
921 *** SVE Shift by Immediate - Predicated Group
922 */
923
60245996
RH
924/*
925 * Copy Zn into Zd, storing zeros into inactive elements.
926 * If invert, store zeros into the active elements.
ccd841c3 927 */
60245996
RH
928static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
929 int esz, bool invert)
ccd841c3 930{
60245996
RH
931 static gen_helper_gvec_3 * const fns[4] = {
932 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
933 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 934 };
8fb27a21 935 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
936}
937
73c558a8
RH
938static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
939 gen_helper_gvec_3 * const fns[4])
ccd841c3 940{
73c558a8
RH
941 int max;
942
ccd841c3
RH
943 if (a->esz < 0) {
944 /* Invalid tsz encoding -- see tszimm_esz. */
945 return false;
946 }
73c558a8
RH
947
948 /*
949 * Shift by element size is architecturally valid.
950 * For arithmetic right-shift, it's the same as by one less.
951 * For logical shifts and ASRD, it is a zeroing operation.
952 */
953 max = 8 << a->esz;
954 if (a->imm >= max) {
955 if (asr) {
956 a->imm = max - 1;
957 } else {
958 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
959 }
960 }
afa2529c 961 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
962}
963
5cccd1f1
RH
964static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
965 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
966 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
967};
968TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 969
5cccd1f1
RH
970static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
971 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
972 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
973};
974TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 975
5cccd1f1
RH
976static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
977 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
978 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
979};
980TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 981
5cccd1f1
RH
982static gen_helper_gvec_3 * const asrd_fns[4] = {
983 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
984 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
985};
986TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 987
4df37e41
RH
988static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
989 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
990 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
991};
992TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
993 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 994
4df37e41
RH
995static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
996 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
997 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
998};
999TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1000 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1001
4df37e41
RH
1002static gen_helper_gvec_3 * const srshr_fns[4] = {
1003 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1004 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1005};
1006TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1007 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1008
4df37e41
RH
1009static gen_helper_gvec_3 * const urshr_fns[4] = {
1010 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1011 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1012};
1013TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1014 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1015
4df37e41
RH
1016static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1017 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1018 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1019};
1020TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1021 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1022
fe7f8dfb
RH
1023/*
1024 *** SVE Bitwise Shift - Predicated Group
1025 */
1026
1027#define DO_ZPZW(NAME, name) \
8e7acb24 1028 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1029 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1030 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1031 }; \
8e7acb24
RH
1032 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1033 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1034
1035DO_ZPZW(ASR, asr)
1036DO_ZPZW(LSR, lsr)
1037DO_ZPZW(LSL, lsl)
1038
1039#undef DO_ZPZW
1040
d9d78dcc
RH
1041/*
1042 *** SVE Bitwise Shift - Unpredicated Group
1043 */
1044
1045static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1046 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1047 int64_t, uint32_t, uint32_t))
1048{
1049 if (a->esz < 0) {
1050 /* Invalid tsz encoding -- see tszimm_esz. */
1051 return false;
1052 }
1053 if (sve_access_check(s)) {
1054 unsigned vsz = vec_full_reg_size(s);
1055 /* Shift by element size is architecturally valid. For
1056 arithmetic right-shift, it's the same as by one less.
1057 Otherwise it is a zeroing operation. */
1058 if (a->imm >= 8 << a->esz) {
1059 if (asr) {
1060 a->imm = (8 << a->esz) - 1;
1061 } else {
1062 do_dupi_z(s, a->rd, 0);
1063 return true;
1064 }
1065 }
1066 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1067 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1068 }
1069 return true;
1070}
1071
5e612f80
RH
1072TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1073TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1074TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1075
d9d78dcc 1076#define DO_ZZW(NAME, name) \
32e2ad65 1077 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1078 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1079 gen_helper_sve_##name##_zzw_s, NULL \
1080 }; \
32e2ad65
RH
1081 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1082 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1083
32e2ad65
RH
1084DO_ZZW(ASR_zzw, asr)
1085DO_ZZW(LSR_zzw, lsr)
1086DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1087
1088#undef DO_ZZW
1089
96a36e4a
RH
1090/*
1091 *** SVE Integer Multiply-Add Group
1092 */
1093
1094static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1095 gen_helper_gvec_5 *fn)
1096{
1097 if (sve_access_check(s)) {
1098 unsigned vsz = vec_full_reg_size(s);
1099 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1100 vec_full_reg_offset(s, a->ra),
1101 vec_full_reg_offset(s, a->rn),
1102 vec_full_reg_offset(s, a->rm),
1103 pred_full_reg_offset(s, a->pg),
1104 vsz, vsz, 0, fn);
1105 }
1106 return true;
1107}
1108
dc67e645
RH
1109static gen_helper_gvec_5 * const mla_fns[4] = {
1110 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1111 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1112};
1113TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1114
dc67e645
RH
1115static gen_helper_gvec_5 * const mls_fns[4] = {
1116 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1117 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1118};
1119TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1120
9a56c9c3
RH
1121/*
1122 *** SVE Index Generation Group
1123 */
1124
6687d05d 1125static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1126 TCGv_i64 start, TCGv_i64 incr)
1127{
6687d05d
RH
1128 unsigned vsz;
1129 TCGv_i32 desc;
1130 TCGv_ptr t_zd;
1131
1132 if (!sve_access_check(s)) {
1133 return true;
1134 }
1135
1136 vsz = vec_full_reg_size(s);
1137 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1138 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1139
1140 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1141 if (esz == 3) {
1142 gen_helper_sve_index_d(t_zd, start, incr, desc);
1143 } else {
1144 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1145 static index_fn * const fns[3] = {
1146 gen_helper_sve_index_b,
1147 gen_helper_sve_index_h,
1148 gen_helper_sve_index_s,
1149 };
1150 TCGv_i32 s32 = tcg_temp_new_i32();
1151 TCGv_i32 i32 = tcg_temp_new_i32();
1152
1153 tcg_gen_extrl_i64_i32(s32, start);
1154 tcg_gen_extrl_i64_i32(i32, incr);
1155 fns[esz](t_zd, s32, i32, desc);
1156
1157 tcg_temp_free_i32(s32);
1158 tcg_temp_free_i32(i32);
1159 }
1160 tcg_temp_free_ptr(t_zd);
6687d05d 1161 return true;
9a56c9c3
RH
1162}
1163
9aa60c83
RH
1164TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1165 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1166TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1167 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1168TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1169 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1170TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1171 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1172
96f922cc
RH
1173/*
1174 *** SVE Stack Allocation Group
1175 */
1176
3a7be554 1177static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1178{
5de56742
AC
1179 if (sve_access_check(s)) {
1180 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1181 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1182 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1183 }
96f922cc
RH
1184 return true;
1185}
1186
3a7be554 1187static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1188{
5de56742
AC
1189 if (sve_access_check(s)) {
1190 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1191 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1192 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1193 }
96f922cc
RH
1194 return true;
1195}
1196
3a7be554 1197static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1198{
5de56742
AC
1199 if (sve_access_check(s)) {
1200 TCGv_i64 reg = cpu_reg(s, a->rd);
1201 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1202 }
96f922cc
RH
1203 return true;
1204}
1205
4b242d9c
RH
1206/*
1207 *** SVE Compute Vector Address Group
1208 */
1209
1210static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1211{
913a8a00 1212 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1213}
1214
dcba3d67
RH
1215TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1216TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1217TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1218TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1219
0762cd42
RH
1220/*
1221 *** SVE Integer Misc - Unpredicated Group
1222 */
1223
0ea3cdbf
RH
1224static gen_helper_gvec_2 * const fexpa_fns[4] = {
1225 NULL, gen_helper_sve_fexpa_h,
1226 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1227};
1228TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1229 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1230
32e2ad65
RH
1231static gen_helper_gvec_3 * const ftssel_fns[4] = {
1232 NULL, gen_helper_sve_ftssel_h,
1233 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1234};
1235TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1236
516e246a
RH
1237/*
1238 *** SVE Predicate Logical Operations Group
1239 */
1240
1241static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1242 const GVecGen4 *gvec_op)
1243{
1244 if (!sve_access_check(s)) {
1245 return true;
1246 }
1247
1248 unsigned psz = pred_gvec_reg_size(s);
1249 int dofs = pred_full_reg_offset(s, a->rd);
1250 int nofs = pred_full_reg_offset(s, a->rn);
1251 int mofs = pred_full_reg_offset(s, a->rm);
1252 int gofs = pred_full_reg_offset(s, a->pg);
1253
dd81a8d7
RH
1254 if (!a->s) {
1255 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1256 return true;
1257 }
1258
516e246a
RH
1259 if (psz == 8) {
1260 /* Do the operation and the flags generation in temps. */
1261 TCGv_i64 pd = tcg_temp_new_i64();
1262 TCGv_i64 pn = tcg_temp_new_i64();
1263 TCGv_i64 pm = tcg_temp_new_i64();
1264 TCGv_i64 pg = tcg_temp_new_i64();
1265
1266 tcg_gen_ld_i64(pn, cpu_env, nofs);
1267 tcg_gen_ld_i64(pm, cpu_env, mofs);
1268 tcg_gen_ld_i64(pg, cpu_env, gofs);
1269
1270 gvec_op->fni8(pd, pn, pm, pg);
1271 tcg_gen_st_i64(pd, cpu_env, dofs);
1272
1273 do_predtest1(pd, pg);
1274
1275 tcg_temp_free_i64(pd);
1276 tcg_temp_free_i64(pn);
1277 tcg_temp_free_i64(pm);
1278 tcg_temp_free_i64(pg);
1279 } else {
1280 /* The operation and flags generation is large. The computation
1281 * of the flags depends on the original contents of the guarding
1282 * predicate. If the destination overwrites the guarding predicate,
1283 * then the easiest way to get this right is to save a copy.
1284 */
1285 int tofs = gofs;
1286 if (a->rd == a->pg) {
1287 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1288 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1289 }
1290
1291 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1292 do_predtest(s, dofs, tofs, psz / 8);
1293 }
1294 return true;
1295}
1296
1297static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1298{
1299 tcg_gen_and_i64(pd, pn, pm);
1300 tcg_gen_and_i64(pd, pd, pg);
1301}
1302
1303static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1304 TCGv_vec pm, TCGv_vec pg)
1305{
1306 tcg_gen_and_vec(vece, pd, pn, pm);
1307 tcg_gen_and_vec(vece, pd, pd, pg);
1308}
1309
3a7be554 1310static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1311{
1312 static const GVecGen4 op = {
1313 .fni8 = gen_and_pg_i64,
1314 .fniv = gen_and_pg_vec,
1315 .fno = gen_helper_sve_and_pppp,
1316 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1317 };
dd81a8d7
RH
1318
1319 if (!a->s) {
1320 if (!sve_access_check(s)) {
1321 return true;
1322 }
1323 if (a->rn == a->rm) {
1324 if (a->pg == a->rn) {
1325 do_mov_p(s, a->rd, a->rn);
1326 } else {
1327 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1328 }
1329 return true;
1330 } else if (a->pg == a->rn || a->pg == a->rm) {
1331 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1332 return true;
516e246a 1333 }
516e246a 1334 }
dd81a8d7 1335 return do_pppp_flags(s, a, &op);
516e246a
RH
1336}
1337
1338static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1339{
1340 tcg_gen_andc_i64(pd, pn, pm);
1341 tcg_gen_and_i64(pd, pd, pg);
1342}
1343
1344static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1345 TCGv_vec pm, TCGv_vec pg)
1346{
1347 tcg_gen_andc_vec(vece, pd, pn, pm);
1348 tcg_gen_and_vec(vece, pd, pd, pg);
1349}
1350
3a7be554 1351static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1352{
1353 static const GVecGen4 op = {
1354 .fni8 = gen_bic_pg_i64,
1355 .fniv = gen_bic_pg_vec,
1356 .fno = gen_helper_sve_bic_pppp,
1357 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1358 };
dd81a8d7
RH
1359
1360 if (!a->s && a->pg == a->rn) {
1361 if (sve_access_check(s)) {
1362 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1363 }
1364 return true;
516e246a 1365 }
dd81a8d7 1366 return do_pppp_flags(s, a, &op);
516e246a
RH
1367}
1368
1369static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1370{
1371 tcg_gen_xor_i64(pd, pn, pm);
1372 tcg_gen_and_i64(pd, pd, pg);
1373}
1374
1375static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1376 TCGv_vec pm, TCGv_vec pg)
1377{
1378 tcg_gen_xor_vec(vece, pd, pn, pm);
1379 tcg_gen_and_vec(vece, pd, pd, pg);
1380}
1381
3a7be554 1382static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1383{
1384 static const GVecGen4 op = {
1385 .fni8 = gen_eor_pg_i64,
1386 .fniv = gen_eor_pg_vec,
1387 .fno = gen_helper_sve_eor_pppp,
1388 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1389 };
dd81a8d7 1390 return do_pppp_flags(s, a, &op);
516e246a
RH
1391}
1392
3a7be554 1393static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1394{
516e246a
RH
1395 if (a->s) {
1396 return false;
516e246a 1397 }
d4bc6232
RH
1398 if (sve_access_check(s)) {
1399 unsigned psz = pred_gvec_reg_size(s);
1400 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1401 pred_full_reg_offset(s, a->pg),
1402 pred_full_reg_offset(s, a->rn),
1403 pred_full_reg_offset(s, a->rm), psz, psz);
1404 }
1405 return true;
516e246a
RH
1406}
1407
1408static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1409{
1410 tcg_gen_or_i64(pd, pn, pm);
1411 tcg_gen_and_i64(pd, pd, pg);
1412}
1413
1414static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1415 TCGv_vec pm, TCGv_vec pg)
1416{
1417 tcg_gen_or_vec(vece, pd, pn, pm);
1418 tcg_gen_and_vec(vece, pd, pd, pg);
1419}
1420
3a7be554 1421static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1422{
1423 static const GVecGen4 op = {
1424 .fni8 = gen_orr_pg_i64,
1425 .fniv = gen_orr_pg_vec,
1426 .fno = gen_helper_sve_orr_pppp,
1427 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1428 };
dd81a8d7
RH
1429
1430 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1431 return do_mov_p(s, a->rd, a->rn);
516e246a 1432 }
dd81a8d7 1433 return do_pppp_flags(s, a, &op);
516e246a
RH
1434}
1435
1436static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1437{
1438 tcg_gen_orc_i64(pd, pn, pm);
1439 tcg_gen_and_i64(pd, pd, pg);
1440}
1441
1442static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1443 TCGv_vec pm, TCGv_vec pg)
1444{
1445 tcg_gen_orc_vec(vece, pd, pn, pm);
1446 tcg_gen_and_vec(vece, pd, pd, pg);
1447}
1448
3a7be554 1449static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1450{
1451 static const GVecGen4 op = {
1452 .fni8 = gen_orn_pg_i64,
1453 .fniv = gen_orn_pg_vec,
1454 .fno = gen_helper_sve_orn_pppp,
1455 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1456 };
dd81a8d7 1457 return do_pppp_flags(s, a, &op);
516e246a
RH
1458}
1459
1460static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1461{
1462 tcg_gen_or_i64(pd, pn, pm);
1463 tcg_gen_andc_i64(pd, pg, pd);
1464}
1465
1466static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1467 TCGv_vec pm, TCGv_vec pg)
1468{
1469 tcg_gen_or_vec(vece, pd, pn, pm);
1470 tcg_gen_andc_vec(vece, pd, pg, pd);
1471}
1472
3a7be554 1473static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1474{
1475 static const GVecGen4 op = {
1476 .fni8 = gen_nor_pg_i64,
1477 .fniv = gen_nor_pg_vec,
1478 .fno = gen_helper_sve_nor_pppp,
1479 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1480 };
dd81a8d7 1481 return do_pppp_flags(s, a, &op);
516e246a
RH
1482}
1483
1484static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1485{
1486 tcg_gen_and_i64(pd, pn, pm);
1487 tcg_gen_andc_i64(pd, pg, pd);
1488}
1489
1490static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1491 TCGv_vec pm, TCGv_vec pg)
1492{
1493 tcg_gen_and_vec(vece, pd, pn, pm);
1494 tcg_gen_andc_vec(vece, pd, pg, pd);
1495}
1496
3a7be554 1497static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1498{
1499 static const GVecGen4 op = {
1500 .fni8 = gen_nand_pg_i64,
1501 .fniv = gen_nand_pg_vec,
1502 .fno = gen_helper_sve_nand_pppp,
1503 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1504 };
dd81a8d7 1505 return do_pppp_flags(s, a, &op);
516e246a
RH
1506}
1507
9e18d7a6
RH
1508/*
1509 *** SVE Predicate Misc Group
1510 */
1511
3a7be554 1512static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1513{
1514 if (sve_access_check(s)) {
1515 int nofs = pred_full_reg_offset(s, a->rn);
1516 int gofs = pred_full_reg_offset(s, a->pg);
1517 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1518
1519 if (words == 1) {
1520 TCGv_i64 pn = tcg_temp_new_i64();
1521 TCGv_i64 pg = tcg_temp_new_i64();
1522
1523 tcg_gen_ld_i64(pn, cpu_env, nofs);
1524 tcg_gen_ld_i64(pg, cpu_env, gofs);
1525 do_predtest1(pn, pg);
1526
1527 tcg_temp_free_i64(pn);
1528 tcg_temp_free_i64(pg);
1529 } else {
1530 do_predtest(s, nofs, gofs, words);
1531 }
1532 }
1533 return true;
1534}
1535
028e2a7b
RH
1536/* See the ARM pseudocode DecodePredCount. */
1537static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1538{
1539 unsigned elements = fullsz >> esz;
1540 unsigned bound;
1541
1542 switch (pattern) {
1543 case 0x0: /* POW2 */
1544 return pow2floor(elements);
1545 case 0x1: /* VL1 */
1546 case 0x2: /* VL2 */
1547 case 0x3: /* VL3 */
1548 case 0x4: /* VL4 */
1549 case 0x5: /* VL5 */
1550 case 0x6: /* VL6 */
1551 case 0x7: /* VL7 */
1552 case 0x8: /* VL8 */
1553 bound = pattern;
1554 break;
1555 case 0x9: /* VL16 */
1556 case 0xa: /* VL32 */
1557 case 0xb: /* VL64 */
1558 case 0xc: /* VL128 */
1559 case 0xd: /* VL256 */
1560 bound = 16 << (pattern - 9);
1561 break;
1562 case 0x1d: /* MUL4 */
1563 return elements - elements % 4;
1564 case 0x1e: /* MUL3 */
1565 return elements - elements % 3;
1566 case 0x1f: /* ALL */
1567 return elements;
1568 default: /* #uimm5 */
1569 return 0;
1570 }
1571 return elements >= bound ? bound : 0;
1572}
1573
1574/* This handles all of the predicate initialization instructions,
1575 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1576 * so that decode_pred_count returns 0. For SETFFR, we will have
1577 * set RD == 16 == FFR.
1578 */
1579static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1580{
1581 if (!sve_access_check(s)) {
1582 return true;
1583 }
1584
1585 unsigned fullsz = vec_full_reg_size(s);
1586 unsigned ofs = pred_full_reg_offset(s, rd);
1587 unsigned numelem, setsz, i;
1588 uint64_t word, lastword;
1589 TCGv_i64 t;
1590
1591 numelem = decode_pred_count(fullsz, pat, esz);
1592
1593 /* Determine what we must store into each bit, and how many. */
1594 if (numelem == 0) {
1595 lastword = word = 0;
1596 setsz = fullsz;
1597 } else {
1598 setsz = numelem << esz;
1599 lastword = word = pred_esz_masks[esz];
1600 if (setsz % 64) {
973558a3 1601 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1602 }
1603 }
1604
1605 t = tcg_temp_new_i64();
1606 if (fullsz <= 64) {
1607 tcg_gen_movi_i64(t, lastword);
1608 tcg_gen_st_i64(t, cpu_env, ofs);
1609 goto done;
1610 }
1611
1612 if (word == lastword) {
1613 unsigned maxsz = size_for_gvec(fullsz / 8);
1614 unsigned oprsz = size_for_gvec(setsz / 8);
1615
1616 if (oprsz * 8 == setsz) {
8711e71f 1617 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1618 goto done;
1619 }
028e2a7b
RH
1620 }
1621
1622 setsz /= 8;
1623 fullsz /= 8;
1624
1625 tcg_gen_movi_i64(t, word);
973558a3 1626 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1627 tcg_gen_st_i64(t, cpu_env, ofs + i);
1628 }
1629 if (lastword != word) {
1630 tcg_gen_movi_i64(t, lastword);
1631 tcg_gen_st_i64(t, cpu_env, ofs + i);
1632 i += 8;
1633 }
1634 if (i < fullsz) {
1635 tcg_gen_movi_i64(t, 0);
1636 for (; i < fullsz; i += 8) {
1637 tcg_gen_st_i64(t, cpu_env, ofs + i);
1638 }
1639 }
1640
1641 done:
1642 tcg_temp_free_i64(t);
1643
1644 /* PTRUES */
1645 if (setflag) {
1646 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1647 tcg_gen_movi_i32(cpu_CF, word == 0);
1648 tcg_gen_movi_i32(cpu_VF, 0);
1649 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1650 }
1651 return true;
1652}
1653
b03a8501 1654TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1655
b03a8501
RH
1656/* Note pat == 31 is #all, to set all elements. */
1657TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1658
b03a8501
RH
1659/* Note pat == 32 is #unimp, to set no elements. */
1660TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1661
3a7be554 1662static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1663{
1664 /* The path through do_pppp_flags is complicated enough to want to avoid
1665 * duplication. Frob the arguments into the form of a predicated AND.
1666 */
1667 arg_rprr_s alt_a = {
1668 .rd = a->rd, .pg = a->pg, .s = a->s,
1669 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1670 };
3a7be554 1671 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1672}
1673
ff502658
RH
1674TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1675TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1676
1677static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1678 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1679 TCGv_ptr, TCGv_i32))
1680{
1681 if (!sve_access_check(s)) {
1682 return true;
1683 }
1684
1685 TCGv_ptr t_pd = tcg_temp_new_ptr();
1686 TCGv_ptr t_pg = tcg_temp_new_ptr();
1687 TCGv_i32 t;
86300b5d 1688 unsigned desc = 0;
028e2a7b 1689
86300b5d
RH
1690 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1691 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1692
1693 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1694 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1695 t = tcg_temp_new_i32();
028e2a7b 1696
392acacc 1697 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1698 tcg_temp_free_ptr(t_pd);
1699 tcg_temp_free_ptr(t_pg);
1700
1701 do_pred_flags(t);
1702 tcg_temp_free_i32(t);
1703 return true;
1704}
1705
d95040e3
RH
1706TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1707TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1708
24e82e68
RH
1709/*
1710 *** SVE Element Count Group
1711 */
1712
1713/* Perform an inline saturating addition of a 32-bit value within
1714 * a 64-bit register. The second operand is known to be positive,
1715 * which halves the comparisions we must perform to bound the result.
1716 */
1717static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1718{
1719 int64_t ibound;
24e82e68
RH
1720
1721 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1722 if (u) {
1723 tcg_gen_ext32u_i64(reg, reg);
1724 } else {
1725 tcg_gen_ext32s_i64(reg, reg);
1726 }
1727 if (d) {
1728 tcg_gen_sub_i64(reg, reg, val);
1729 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1730 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1731 } else {
1732 tcg_gen_add_i64(reg, reg, val);
1733 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1734 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1735 }
24e82e68
RH
1736}
1737
1738/* Similarly with 64-bit values. */
1739static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1740{
1741 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1742 TCGv_i64 t2;
1743
1744 if (u) {
1745 if (d) {
1746 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1747 t2 = tcg_constant_i64(0);
1748 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1749 } else {
1750 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1751 t2 = tcg_constant_i64(-1);
1752 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1753 }
1754 } else {
35a1ec8e 1755 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1756 if (d) {
1757 /* Detect signed overflow for subtraction. */
1758 tcg_gen_xor_i64(t0, reg, val);
1759 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1760 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1761 tcg_gen_and_i64(t0, t0, reg);
1762
1763 /* Bound the result. */
1764 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1765 t2 = tcg_constant_i64(0);
24e82e68
RH
1766 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1767 } else {
1768 /* Detect signed overflow for addition. */
1769 tcg_gen_xor_i64(t0, reg, val);
1770 tcg_gen_add_i64(reg, reg, val);
1771 tcg_gen_xor_i64(t1, reg, val);
1772 tcg_gen_andc_i64(t0, t1, t0);
1773
1774 /* Bound the result. */
1775 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1776 t2 = tcg_constant_i64(0);
24e82e68
RH
1777 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1778 }
35a1ec8e 1779 tcg_temp_free_i64(t1);
24e82e68
RH
1780 }
1781 tcg_temp_free_i64(t0);
24e82e68
RH
1782}
1783
1784/* Similarly with a vector and a scalar operand. */
1785static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1786 TCGv_i64 val, bool u, bool d)
1787{
1788 unsigned vsz = vec_full_reg_size(s);
1789 TCGv_ptr dptr, nptr;
1790 TCGv_i32 t32, desc;
1791 TCGv_i64 t64;
1792
1793 dptr = tcg_temp_new_ptr();
1794 nptr = tcg_temp_new_ptr();
1795 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1796 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1797 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1798
1799 switch (esz) {
1800 case MO_8:
1801 t32 = tcg_temp_new_i32();
1802 tcg_gen_extrl_i64_i32(t32, val);
1803 if (d) {
1804 tcg_gen_neg_i32(t32, t32);
1805 }
1806 if (u) {
1807 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1808 } else {
1809 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1810 }
1811 tcg_temp_free_i32(t32);
1812 break;
1813
1814 case MO_16:
1815 t32 = tcg_temp_new_i32();
1816 tcg_gen_extrl_i64_i32(t32, val);
1817 if (d) {
1818 tcg_gen_neg_i32(t32, t32);
1819 }
1820 if (u) {
1821 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1822 } else {
1823 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1824 }
1825 tcg_temp_free_i32(t32);
1826 break;
1827
1828 case MO_32:
1829 t64 = tcg_temp_new_i64();
1830 if (d) {
1831 tcg_gen_neg_i64(t64, val);
1832 } else {
1833 tcg_gen_mov_i64(t64, val);
1834 }
1835 if (u) {
1836 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1837 } else {
1838 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1839 }
1840 tcg_temp_free_i64(t64);
1841 break;
1842
1843 case MO_64:
1844 if (u) {
1845 if (d) {
1846 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1847 } else {
1848 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1849 }
1850 } else if (d) {
1851 t64 = tcg_temp_new_i64();
1852 tcg_gen_neg_i64(t64, val);
1853 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1854 tcg_temp_free_i64(t64);
1855 } else {
1856 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1857 }
1858 break;
1859
1860 default:
1861 g_assert_not_reached();
1862 }
1863
1864 tcg_temp_free_ptr(dptr);
1865 tcg_temp_free_ptr(nptr);
24e82e68
RH
1866}
1867
3a7be554 1868static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1869{
1870 if (sve_access_check(s)) {
1871 unsigned fullsz = vec_full_reg_size(s);
1872 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1873 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1874 }
1875 return true;
1876}
1877
3a7be554 1878static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1879{
1880 if (sve_access_check(s)) {
1881 unsigned fullsz = vec_full_reg_size(s);
1882 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1883 int inc = numelem * a->imm * (a->d ? -1 : 1);
1884 TCGv_i64 reg = cpu_reg(s, a->rd);
1885
1886 tcg_gen_addi_i64(reg, reg, inc);
1887 }
1888 return true;
1889}
1890
3a7be554 1891static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1892{
1893 if (!sve_access_check(s)) {
1894 return true;
1895 }
1896
1897 unsigned fullsz = vec_full_reg_size(s);
1898 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1899 int inc = numelem * a->imm;
1900 TCGv_i64 reg = cpu_reg(s, a->rd);
1901
1902 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1903 if (inc == 0) {
1904 if (a->u) {
1905 tcg_gen_ext32u_i64(reg, reg);
1906 } else {
1907 tcg_gen_ext32s_i64(reg, reg);
1908 }
1909 } else {
d681f125 1910 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1911 }
1912 return true;
1913}
1914
3a7be554 1915static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1916{
1917 if (!sve_access_check(s)) {
1918 return true;
1919 }
1920
1921 unsigned fullsz = vec_full_reg_size(s);
1922 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1923 int inc = numelem * a->imm;
1924 TCGv_i64 reg = cpu_reg(s, a->rd);
1925
1926 if (inc != 0) {
d681f125 1927 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1928 }
1929 return true;
1930}
1931
3a7be554 1932static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1933{
1934 if (a->esz == 0) {
1935 return false;
1936 }
1937
1938 unsigned fullsz = vec_full_reg_size(s);
1939 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1940 int inc = numelem * a->imm;
1941
1942 if (inc != 0) {
1943 if (sve_access_check(s)) {
24e82e68
RH
1944 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1945 vec_full_reg_offset(s, a->rn),
d681f125
RH
1946 tcg_constant_i64(a->d ? -inc : inc),
1947 fullsz, fullsz);
24e82e68
RH
1948 }
1949 } else {
1950 do_mov_z(s, a->rd, a->rn);
1951 }
1952 return true;
1953}
1954
3a7be554 1955static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1956{
1957 if (a->esz == 0) {
1958 return false;
1959 }
1960
1961 unsigned fullsz = vec_full_reg_size(s);
1962 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1963 int inc = numelem * a->imm;
1964
1965 if (inc != 0) {
1966 if (sve_access_check(s)) {
d681f125
RH
1967 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
1968 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1969 }
1970 } else {
1971 do_mov_z(s, a->rd, a->rn);
1972 }
1973 return true;
1974}
1975
e1fa1164
RH
1976/*
1977 *** SVE Bitwise Immediate Group
1978 */
1979
1980static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1981{
1982 uint64_t imm;
1983 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1984 extract32(a->dbm, 0, 6),
1985 extract32(a->dbm, 6, 6))) {
1986 return false;
1987 }
faf915e2 1988 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
1989}
1990
15a314da
RH
1991TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
1992TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
1993TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 1994
3a7be554 1995static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1996{
1997 uint64_t imm;
1998 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1999 extract32(a->dbm, 0, 6),
2000 extract32(a->dbm, 6, 6))) {
2001 return false;
2002 }
2003 if (sve_access_check(s)) {
2004 do_dupi_z(s, a->rd, imm);
2005 }
2006 return true;
2007}
2008
f25a2361
RH
2009/*
2010 *** SVE Integer Wide Immediate - Predicated Group
2011 */
2012
2013/* Implement all merging copies. This is used for CPY (immediate),
2014 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2015 */
2016static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2017 TCGv_i64 val)
2018{
2019 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2020 static gen_cpy * const fns[4] = {
2021 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2022 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2023 };
2024 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2025 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2026 TCGv_ptr t_zd = tcg_temp_new_ptr();
2027 TCGv_ptr t_zn = tcg_temp_new_ptr();
2028 TCGv_ptr t_pg = tcg_temp_new_ptr();
2029
2030 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2031 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2032 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2033
2034 fns[esz](t_zd, t_zn, t_pg, val, desc);
2035
2036 tcg_temp_free_ptr(t_zd);
2037 tcg_temp_free_ptr(t_zn);
2038 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2039}
2040
3a7be554 2041static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2042{
2043 if (a->esz == 0) {
2044 return false;
2045 }
2046 if (sve_access_check(s)) {
2047 /* Decode the VFP immediate. */
2048 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2049 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2050 }
2051 return true;
2052}
2053
3a7be554 2054static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2055{
f25a2361 2056 if (sve_access_check(s)) {
e152b48b 2057 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2058 }
2059 return true;
2060}
2061
3a7be554 2062static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2063{
2064 static gen_helper_gvec_2i * const fns[4] = {
2065 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2066 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2067 };
2068
f25a2361
RH
2069 if (sve_access_check(s)) {
2070 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2071 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2072 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2073 tcg_constant_i64(a->imm),
2074 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2075 }
2076 return true;
2077}
2078
b94f8f60
RH
2079/*
2080 *** SVE Permute Extract Group
2081 */
2082
75114792 2083static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2084{
2085 if (!sve_access_check(s)) {
2086 return true;
2087 }
2088
2089 unsigned vsz = vec_full_reg_size(s);
75114792 2090 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2091 unsigned n_siz = vsz - n_ofs;
75114792
SL
2092 unsigned d = vec_full_reg_offset(s, rd);
2093 unsigned n = vec_full_reg_offset(s, rn);
2094 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2095
2096 /* Use host vector move insns if we have appropriate sizes
2097 * and no unfortunate overlap.
2098 */
2099 if (m != d
2100 && n_ofs == size_for_gvec(n_ofs)
2101 && n_siz == size_for_gvec(n_siz)
2102 && (d != n || n_siz <= n_ofs)) {
2103 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2104 if (n_ofs != 0) {
2105 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2106 }
2107 } else {
2108 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2109 }
2110 return true;
2111}
2112
c799c115
RH
2113TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2114TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2115
30562ab7
RH
2116/*
2117 *** SVE Permute - Unpredicated Group
2118 */
2119
3a7be554 2120static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2121{
2122 if (sve_access_check(s)) {
2123 unsigned vsz = vec_full_reg_size(s);
2124 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2125 vsz, vsz, cpu_reg_sp(s, a->rn));
2126 }
2127 return true;
2128}
2129
3a7be554 2130static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2131{
2132 if ((a->imm & 0x1f) == 0) {
2133 return false;
2134 }
2135 if (sve_access_check(s)) {
2136 unsigned vsz = vec_full_reg_size(s);
2137 unsigned dofs = vec_full_reg_offset(s, a->rd);
2138 unsigned esz, index;
2139
2140 esz = ctz32(a->imm);
2141 index = a->imm >> (esz + 1);
2142
2143 if ((index << esz) < vsz) {
2144 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2145 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2146 } else {
7e17d50e
RH
2147 /*
2148 * While dup_mem handles 128-bit elements, dup_imm does not.
2149 * Thankfully element size doesn't matter for splatting zero.
2150 */
2151 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2152 }
2153 }
2154 return true;
2155}
2156
2157static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2158{
2159 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2160 static gen_insr * const fns[4] = {
2161 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2162 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2163 };
2164 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2165 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2166 TCGv_ptr t_zd = tcg_temp_new_ptr();
2167 TCGv_ptr t_zn = tcg_temp_new_ptr();
2168
2169 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2170 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2171
2172 fns[a->esz](t_zd, t_zn, val, desc);
2173
2174 tcg_temp_free_ptr(t_zd);
2175 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2176}
2177
3a7be554 2178static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2179{
2180 if (sve_access_check(s)) {
2181 TCGv_i64 t = tcg_temp_new_i64();
2182 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2183 do_insr_i64(s, a, t);
2184 tcg_temp_free_i64(t);
2185 }
2186 return true;
2187}
2188
3a7be554 2189static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2190{
2191 if (sve_access_check(s)) {
2192 do_insr_i64(s, a, cpu_reg(s, a->rm));
2193 }
2194 return true;
2195}
2196
0ea3cdbf
RH
2197static gen_helper_gvec_2 * const rev_fns[4] = {
2198 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2199 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2200};
2201TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2202
32e2ad65
RH
2203static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2204 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2205 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2206};
2207TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2208
5f425b92
RH
2209static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2210 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2211 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2212};
2213TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2214 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2215
32e2ad65
RH
2216static gen_helper_gvec_3 * const tbx_fns[4] = {
2217 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2218 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2219};
2220TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2221
3a7be554 2222static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2223{
2224 static gen_helper_gvec_2 * const fns[4][2] = {
2225 { NULL, NULL },
2226 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2227 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2228 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2229 };
2230
2231 if (a->esz == 0) {
2232 return false;
2233 }
2234 if (sve_access_check(s)) {
2235 unsigned vsz = vec_full_reg_size(s);
2236 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2237 vec_full_reg_offset(s, a->rn)
2238 + (a->h ? vsz / 2 : 0),
2239 vsz, vsz, 0, fns[a->esz][a->u]);
2240 }
2241 return true;
2242}
2243
d731d8cb
RH
2244/*
2245 *** SVE Permute - Predicates Group
2246 */
2247
2248static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2249 gen_helper_gvec_3 *fn)
2250{
2251 if (!sve_access_check(s)) {
2252 return true;
2253 }
2254
2255 unsigned vsz = pred_full_reg_size(s);
2256
d731d8cb
RH
2257 TCGv_ptr t_d = tcg_temp_new_ptr();
2258 TCGv_ptr t_n = tcg_temp_new_ptr();
2259 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2260 uint32_t desc = 0;
d731d8cb 2261
f9b0fcce
RH
2262 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2263 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2264 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2265
2266 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2267 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2268 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2269
c6a59b55 2270 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2271
2272 tcg_temp_free_ptr(t_d);
2273 tcg_temp_free_ptr(t_n);
2274 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2275 return true;
2276}
2277
2278static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2279 gen_helper_gvec_2 *fn)
2280{
2281 if (!sve_access_check(s)) {
2282 return true;
2283 }
2284
2285 unsigned vsz = pred_full_reg_size(s);
2286 TCGv_ptr t_d = tcg_temp_new_ptr();
2287 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2288 uint32_t desc = 0;
d731d8cb
RH
2289
2290 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2291 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2292
70acaafe
RH
2293 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2294 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2295 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2296
c6a59b55 2297 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2298
d731d8cb
RH
2299 tcg_temp_free_ptr(t_d);
2300 tcg_temp_free_ptr(t_n);
2301 return true;
2302}
2303
bdb349f5
RH
2304TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2305TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2306TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2307TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2308TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2309TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2310
1d0fce4b
RH
2311TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2312TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2313TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2314
234b48e9
RH
2315/*
2316 *** SVE Permute - Interleaving Group
2317 */
2318
a95b9618
RH
2319static gen_helper_gvec_3 * const zip_fns[4] = {
2320 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2321 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2322};
2323TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2324 zip_fns[a->esz], a, 0)
2325TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2326 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2327
2328TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2329 gen_helper_sve2_zip_q, a, 0)
2330TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2331 gen_helper_sve2_zip_q, a,
2332 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2333
234b48e9
RH
2334static gen_helper_gvec_3 * const uzp_fns[4] = {
2335 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2336 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2337};
2338
32e2ad65
RH
2339TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2340 uzp_fns[a->esz], a, 0)
2341TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2342 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2343
32e2ad65
RH
2344TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2345 gen_helper_sve2_uzp_q, a, 0)
2346TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2347 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2348
234b48e9
RH
2349static gen_helper_gvec_3 * const trn_fns[4] = {
2350 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2351 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2352};
2353
32e2ad65
RH
2354TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2355 trn_fns[a->esz], a, 0)
2356TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2357 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2358
32e2ad65
RH
2359TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2360 gen_helper_sve2_trn_q, a, 0)
2361TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2362 gen_helper_sve2_trn_q, a, 16)
74b64b25 2363
3ca879ae
RH
2364/*
2365 *** SVE Permute Vector - Predicated Group
2366 */
2367
817bd5c9
RH
2368static gen_helper_gvec_3 * const compact_fns[4] = {
2369 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2370};
2371TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2372
ef23cb72
RH
2373/* Call the helper that computes the ARM LastActiveElement pseudocode
2374 * function, scaled by the element size. This includes the not found
2375 * indication; e.g. not found for esz=3 is -8.
2376 */
2377static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2378{
2379 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2380 * round up, as we do elsewhere, because we need the exact size.
2381 */
2382 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2383 unsigned desc = 0;
ef23cb72 2384
2acbfbe4
RH
2385 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2386 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2387
2388 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2389
c6a59b55 2390 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2391
ef23cb72
RH
2392 tcg_temp_free_ptr(t_p);
2393}
2394
2395/* Increment LAST to the offset of the next element in the vector,
2396 * wrapping around to 0.
2397 */
2398static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2399{
2400 unsigned vsz = vec_full_reg_size(s);
2401
2402 tcg_gen_addi_i32(last, last, 1 << esz);
2403 if (is_power_of_2(vsz)) {
2404 tcg_gen_andi_i32(last, last, vsz - 1);
2405 } else {
4b308bd5
RH
2406 TCGv_i32 max = tcg_constant_i32(vsz);
2407 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2408 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2409 }
2410}
2411
2412/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2413static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2414{
2415 unsigned vsz = vec_full_reg_size(s);
2416
2417 if (is_power_of_2(vsz)) {
2418 tcg_gen_andi_i32(last, last, vsz - 1);
2419 } else {
4b308bd5
RH
2420 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2421 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2422 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2423 }
2424}
2425
2426/* Load an unsigned element of ESZ from BASE+OFS. */
2427static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2428{
2429 TCGv_i64 r = tcg_temp_new_i64();
2430
2431 switch (esz) {
2432 case 0:
2433 tcg_gen_ld8u_i64(r, base, ofs);
2434 break;
2435 case 1:
2436 tcg_gen_ld16u_i64(r, base, ofs);
2437 break;
2438 case 2:
2439 tcg_gen_ld32u_i64(r, base, ofs);
2440 break;
2441 case 3:
2442 tcg_gen_ld_i64(r, base, ofs);
2443 break;
2444 default:
2445 g_assert_not_reached();
2446 }
2447 return r;
2448}
2449
2450/* Load an unsigned element of ESZ from RM[LAST]. */
2451static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2452 int rm, int esz)
2453{
2454 TCGv_ptr p = tcg_temp_new_ptr();
2455 TCGv_i64 r;
2456
2457 /* Convert offset into vector into offset into ENV.
2458 * The final adjustment for the vector register base
2459 * is added via constant offset to the load.
2460 */
e03b5686 2461#if HOST_BIG_ENDIAN
ef23cb72
RH
2462 /* Adjust for element ordering. See vec_reg_offset. */
2463 if (esz < 3) {
2464 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2465 }
2466#endif
2467 tcg_gen_ext_i32_ptr(p, last);
2468 tcg_gen_add_ptr(p, p, cpu_env);
2469
2470 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2471 tcg_temp_free_ptr(p);
2472
2473 return r;
2474}
2475
2476/* Compute CLAST for a Zreg. */
2477static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2478{
2479 TCGv_i32 last;
2480 TCGLabel *over;
2481 TCGv_i64 ele;
2482 unsigned vsz, esz = a->esz;
2483
2484 if (!sve_access_check(s)) {
2485 return true;
2486 }
2487
2488 last = tcg_temp_local_new_i32();
2489 over = gen_new_label();
2490
2491 find_last_active(s, last, esz, a->pg);
2492
2493 /* There is of course no movcond for a 2048-bit vector,
2494 * so we must branch over the actual store.
2495 */
2496 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2497
2498 if (!before) {
2499 incr_last_active(s, last, esz);
2500 }
2501
2502 ele = load_last_active(s, last, a->rm, esz);
2503 tcg_temp_free_i32(last);
2504
2505 vsz = vec_full_reg_size(s);
2506 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2507 tcg_temp_free_i64(ele);
2508
2509 /* If this insn used MOVPRFX, we may need a second move. */
2510 if (a->rd != a->rn) {
2511 TCGLabel *done = gen_new_label();
2512 tcg_gen_br(done);
2513
2514 gen_set_label(over);
2515 do_mov_z(s, a->rd, a->rn);
2516
2517 gen_set_label(done);
2518 } else {
2519 gen_set_label(over);
2520 }
2521 return true;
2522}
2523
db7fa5d8
RH
2524TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2525TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2526
2527/* Compute CLAST for a scalar. */
2528static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2529 bool before, TCGv_i64 reg_val)
2530{
2531 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2532 TCGv_i64 ele, cmp;
ef23cb72
RH
2533
2534 find_last_active(s, last, esz, pg);
2535
2536 /* Extend the original value of last prior to incrementing. */
2537 cmp = tcg_temp_new_i64();
2538 tcg_gen_ext_i32_i64(cmp, last);
2539
2540 if (!before) {
2541 incr_last_active(s, last, esz);
2542 }
2543
2544 /* The conceit here is that while last < 0 indicates not found, after
2545 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2546 * from which we can load garbage. We then discard the garbage with
2547 * a conditional move.
2548 */
2549 ele = load_last_active(s, last, rm, esz);
2550 tcg_temp_free_i32(last);
2551
053552d3
RH
2552 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2553 ele, reg_val);
ef23cb72 2554
ef23cb72
RH
2555 tcg_temp_free_i64(cmp);
2556 tcg_temp_free_i64(ele);
2557}
2558
2559/* Compute CLAST for a Vreg. */
2560static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2561{
2562 if (sve_access_check(s)) {
2563 int esz = a->esz;
2564 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2565 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2566
2567 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2568 write_fp_dreg(s, a->rd, reg);
2569 tcg_temp_free_i64(reg);
2570 }
2571 return true;
2572}
2573
ac4fb247
RH
2574TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2575TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2576
2577/* Compute CLAST for a Xreg. */
2578static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2579{
2580 TCGv_i64 reg;
2581
2582 if (!sve_access_check(s)) {
2583 return true;
2584 }
2585
2586 reg = cpu_reg(s, a->rd);
2587 switch (a->esz) {
2588 case 0:
2589 tcg_gen_ext8u_i64(reg, reg);
2590 break;
2591 case 1:
2592 tcg_gen_ext16u_i64(reg, reg);
2593 break;
2594 case 2:
2595 tcg_gen_ext32u_i64(reg, reg);
2596 break;
2597 case 3:
2598 break;
2599 default:
2600 g_assert_not_reached();
2601 }
2602
2603 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2604 return true;
2605}
2606
c673404a
RH
2607TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2608TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2609
2610/* Compute LAST for a scalar. */
2611static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2612 int pg, int rm, bool before)
2613{
2614 TCGv_i32 last = tcg_temp_new_i32();
2615 TCGv_i64 ret;
2616
2617 find_last_active(s, last, esz, pg);
2618 if (before) {
2619 wrap_last_active(s, last, esz);
2620 } else {
2621 incr_last_active(s, last, esz);
2622 }
2623
2624 ret = load_last_active(s, last, rm, esz);
2625 tcg_temp_free_i32(last);
2626 return ret;
2627}
2628
2629/* Compute LAST for a Vreg. */
2630static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2631{
2632 if (sve_access_check(s)) {
2633 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2634 write_fp_dreg(s, a->rd, val);
2635 tcg_temp_free_i64(val);
2636 }
2637 return true;
2638}
2639
75de9fd4
RH
2640TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2641TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2642
2643/* Compute LAST for a Xreg. */
2644static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2645{
2646 if (sve_access_check(s)) {
2647 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2648 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2649 tcg_temp_free_i64(val);
2650 }
2651 return true;
2652}
2653
884c5a80
RH
2654TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2655TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2656
3a7be554 2657static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2658{
2659 if (sve_access_check(s)) {
2660 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2661 }
2662 return true;
2663}
2664
3a7be554 2665static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2666{
2667 if (sve_access_check(s)) {
2668 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2669 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2670 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2671 tcg_temp_free_i64(t);
2672 }
2673 return true;
2674}
2675
817bd5c9
RH
2676static gen_helper_gvec_3 * const revb_fns[4] = {
2677 NULL, gen_helper_sve_revb_h,
2678 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2679};
2680TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2681
817bd5c9
RH
2682static gen_helper_gvec_3 * const revh_fns[4] = {
2683 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2684};
2685TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2686
817bd5c9
RH
2687TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2688 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2689
897ebd70
RH
2690TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2691 gen_helper_sve_splice, a, a->esz)
b48ff240 2692
897ebd70
RH
2693TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2694 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2695
757f9cff
RH
2696/*
2697 *** SVE Integer Compare - Vectors Group
2698 */
2699
2700static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2701 gen_helper_gvec_flags_4 *gen_fn)
2702{
2703 TCGv_ptr pd, zn, zm, pg;
2704 unsigned vsz;
2705 TCGv_i32 t;
2706
2707 if (gen_fn == NULL) {
2708 return false;
2709 }
2710 if (!sve_access_check(s)) {
2711 return true;
2712 }
2713
2714 vsz = vec_full_reg_size(s);
392acacc 2715 t = tcg_temp_new_i32();
757f9cff
RH
2716 pd = tcg_temp_new_ptr();
2717 zn = tcg_temp_new_ptr();
2718 zm = tcg_temp_new_ptr();
2719 pg = tcg_temp_new_ptr();
2720
2721 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2722 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2723 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2724 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2725
392acacc 2726 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2727
2728 tcg_temp_free_ptr(pd);
2729 tcg_temp_free_ptr(zn);
2730 tcg_temp_free_ptr(zm);
2731 tcg_temp_free_ptr(pg);
2732
2733 do_pred_flags(t);
2734
2735 tcg_temp_free_i32(t);
2736 return true;
2737}
2738
2739#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2740 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2741 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2742 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2743 }; \
2744 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2745 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2746
2747DO_PPZZ(CMPEQ, cmpeq)
2748DO_PPZZ(CMPNE, cmpne)
2749DO_PPZZ(CMPGT, cmpgt)
2750DO_PPZZ(CMPGE, cmpge)
2751DO_PPZZ(CMPHI, cmphi)
2752DO_PPZZ(CMPHS, cmphs)
2753
2754#undef DO_PPZZ
2755
2756#define DO_PPZW(NAME, name) \
671bdb2e
RH
2757 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2758 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2759 gen_helper_sve_##name##_ppzw_s, NULL \
2760 }; \
2761 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2762 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2763
2764DO_PPZW(CMPEQ, cmpeq)
2765DO_PPZW(CMPNE, cmpne)
2766DO_PPZW(CMPGT, cmpgt)
2767DO_PPZW(CMPGE, cmpge)
2768DO_PPZW(CMPHI, cmphi)
2769DO_PPZW(CMPHS, cmphs)
2770DO_PPZW(CMPLT, cmplt)
2771DO_PPZW(CMPLE, cmple)
2772DO_PPZW(CMPLO, cmplo)
2773DO_PPZW(CMPLS, cmpls)
2774
2775#undef DO_PPZW
2776
38cadeba
RH
2777/*
2778 *** SVE Integer Compare - Immediate Groups
2779 */
2780
2781static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2782 gen_helper_gvec_flags_3 *gen_fn)
2783{
2784 TCGv_ptr pd, zn, pg;
2785 unsigned vsz;
2786 TCGv_i32 t;
2787
2788 if (gen_fn == NULL) {
2789 return false;
2790 }
2791 if (!sve_access_check(s)) {
2792 return true;
2793 }
2794
2795 vsz = vec_full_reg_size(s);
392acacc 2796 t = tcg_temp_new_i32();
38cadeba
RH
2797 pd = tcg_temp_new_ptr();
2798 zn = tcg_temp_new_ptr();
2799 pg = tcg_temp_new_ptr();
2800
2801 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2802 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2803 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2804
392acacc 2805 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
2806
2807 tcg_temp_free_ptr(pd);
2808 tcg_temp_free_ptr(zn);
2809 tcg_temp_free_ptr(pg);
2810
2811 do_pred_flags(t);
2812
2813 tcg_temp_free_i32(t);
2814 return true;
2815}
2816
2817#define DO_PPZI(NAME, name) \
9c545be6 2818 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
2819 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2820 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2821 }; \
9c545be6
RH
2822 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2823 name##_ppzi_fns[a->esz])
38cadeba
RH
2824
2825DO_PPZI(CMPEQ, cmpeq)
2826DO_PPZI(CMPNE, cmpne)
2827DO_PPZI(CMPGT, cmpgt)
2828DO_PPZI(CMPGE, cmpge)
2829DO_PPZI(CMPHI, cmphi)
2830DO_PPZI(CMPHS, cmphs)
2831DO_PPZI(CMPLT, cmplt)
2832DO_PPZI(CMPLE, cmple)
2833DO_PPZI(CMPLO, cmplo)
2834DO_PPZI(CMPLS, cmpls)
2835
2836#undef DO_PPZI
2837
35da316f
RH
2838/*
2839 *** SVE Partition Break Group
2840 */
2841
2842static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2843 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2844{
2845 if (!sve_access_check(s)) {
2846 return true;
2847 }
2848
2849 unsigned vsz = pred_full_reg_size(s);
2850
2851 /* Predicate sizes may be smaller and cannot use simd_desc. */
2852 TCGv_ptr d = tcg_temp_new_ptr();
2853 TCGv_ptr n = tcg_temp_new_ptr();
2854 TCGv_ptr m = tcg_temp_new_ptr();
2855 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2856 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2857
2858 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2859 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2860 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2861 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2862
2863 if (a->s) {
93418f1c
RH
2864 TCGv_i32 t = tcg_temp_new_i32();
2865 fn_s(t, d, n, m, g, desc);
35da316f 2866 do_pred_flags(t);
93418f1c 2867 tcg_temp_free_i32(t);
35da316f 2868 } else {
93418f1c 2869 fn(d, n, m, g, desc);
35da316f
RH
2870 }
2871 tcg_temp_free_ptr(d);
2872 tcg_temp_free_ptr(n);
2873 tcg_temp_free_ptr(m);
2874 tcg_temp_free_ptr(g);
35da316f
RH
2875 return true;
2876}
2877
2878static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2879 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2880{
2881 if (!sve_access_check(s)) {
2882 return true;
2883 }
2884
2885 unsigned vsz = pred_full_reg_size(s);
2886
2887 /* Predicate sizes may be smaller and cannot use simd_desc. */
2888 TCGv_ptr d = tcg_temp_new_ptr();
2889 TCGv_ptr n = tcg_temp_new_ptr();
2890 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2891 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2892
2893 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2894 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2895 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2896
2897 if (a->s) {
93418f1c
RH
2898 TCGv_i32 t = tcg_temp_new_i32();
2899 fn_s(t, d, n, g, desc);
35da316f 2900 do_pred_flags(t);
93418f1c 2901 tcg_temp_free_i32(t);
35da316f 2902 } else {
93418f1c 2903 fn(d, n, g, desc);
35da316f
RH
2904 }
2905 tcg_temp_free_ptr(d);
2906 tcg_temp_free_ptr(n);
2907 tcg_temp_free_ptr(g);
35da316f
RH
2908 return true;
2909}
2910
2224d24d
RH
2911TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
2912 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
2913TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
2914 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
2915
2916TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
2917 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
2918TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
2919 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
2920
2921TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
2922 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
2923TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
2924 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
2925
2926TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
2927 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 2928
9ee3a611
RH
2929/*
2930 *** SVE Predicate Count Group
2931 */
2932
2933static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2934{
2935 unsigned psz = pred_full_reg_size(s);
2936
2937 if (psz <= 8) {
2938 uint64_t psz_mask;
2939
2940 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2941 if (pn != pg) {
2942 TCGv_i64 g = tcg_temp_new_i64();
2943 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2944 tcg_gen_and_i64(val, val, g);
2945 tcg_temp_free_i64(g);
2946 }
2947
2948 /* Reduce the pred_esz_masks value simply to reduce the
2949 * size of the code generated here.
2950 */
2951 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2952 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2953
2954 tcg_gen_ctpop_i64(val, val);
2955 } else {
2956 TCGv_ptr t_pn = tcg_temp_new_ptr();
2957 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 2958 unsigned desc = 0;
9ee3a611 2959
f556a201
RH
2960 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2961 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
2962
2963 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2964 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 2965
c6a59b55 2966 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
2967 tcg_temp_free_ptr(t_pn);
2968 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
2969 }
2970}
2971
3a7be554 2972static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
2973{
2974 if (sve_access_check(s)) {
2975 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2976 }
2977 return true;
2978}
2979
3a7be554 2980static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
2981{
2982 if (sve_access_check(s)) {
2983 TCGv_i64 reg = cpu_reg(s, a->rd);
2984 TCGv_i64 val = tcg_temp_new_i64();
2985
2986 do_cntp(s, val, a->esz, a->pg, a->pg);
2987 if (a->d) {
2988 tcg_gen_sub_i64(reg, reg, val);
2989 } else {
2990 tcg_gen_add_i64(reg, reg, val);
2991 }
2992 tcg_temp_free_i64(val);
2993 }
2994 return true;
2995}
2996
3a7be554 2997static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
2998{
2999 if (a->esz == 0) {
3000 return false;
3001 }
3002 if (sve_access_check(s)) {
3003 unsigned vsz = vec_full_reg_size(s);
3004 TCGv_i64 val = tcg_temp_new_i64();
3005 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3006
3007 do_cntp(s, val, a->esz, a->pg, a->pg);
3008 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3009 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3010 }
3011 return true;
3012}
3013
3a7be554 3014static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3015{
3016 if (sve_access_check(s)) {
3017 TCGv_i64 reg = cpu_reg(s, a->rd);
3018 TCGv_i64 val = tcg_temp_new_i64();
3019
3020 do_cntp(s, val, a->esz, a->pg, a->pg);
3021 do_sat_addsub_32(reg, val, a->u, a->d);
3022 }
3023 return true;
3024}
3025
3a7be554 3026static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3027{
3028 if (sve_access_check(s)) {
3029 TCGv_i64 reg = cpu_reg(s, a->rd);
3030 TCGv_i64 val = tcg_temp_new_i64();
3031
3032 do_cntp(s, val, a->esz, a->pg, a->pg);
3033 do_sat_addsub_64(reg, val, a->u, a->d);
3034 }
3035 return true;
3036}
3037
3a7be554 3038static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3039{
3040 if (a->esz == 0) {
3041 return false;
3042 }
3043 if (sve_access_check(s)) {
3044 TCGv_i64 val = tcg_temp_new_i64();
3045 do_cntp(s, val, a->esz, a->pg, a->pg);
3046 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3047 }
3048 return true;
3049}
3050
caf1cefc
RH
3051/*
3052 *** SVE Integer Compare Scalars Group
3053 */
3054
3a7be554 3055static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3056{
3057 if (!sve_access_check(s)) {
3058 return true;
3059 }
3060
3061 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3062 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3063 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3064 TCGv_i64 cmp = tcg_temp_new_i64();
3065
3066 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3067 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3068 tcg_temp_free_i64(cmp);
3069
3070 /* VF = !NF & !CF. */
3071 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3072 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3073
3074 /* Both NF and VF actually look at bit 31. */
3075 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3076 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3077 return true;
3078}
3079
3a7be554 3080static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3081{
bbd0968c 3082 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3083 TCGv_i32 t2;
caf1cefc 3084 TCGv_ptr ptr;
e610906c
RH
3085 unsigned vsz = vec_full_reg_size(s);
3086 unsigned desc = 0;
caf1cefc 3087 TCGCond cond;
34688dbc
RH
3088 uint64_t maxval;
3089 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3090 bool eq = a->eq == a->lt;
caf1cefc 3091
34688dbc
RH
3092 /* The greater-than conditions are all SVE2. */
3093 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3094 return false;
3095 }
bbd0968c
RH
3096 if (!sve_access_check(s)) {
3097 return true;
3098 }
3099
3100 op0 = read_cpu_reg(s, a->rn, 1);
3101 op1 = read_cpu_reg(s, a->rm, 1);
3102
caf1cefc
RH
3103 if (!a->sf) {
3104 if (a->u) {
3105 tcg_gen_ext32u_i64(op0, op0);
3106 tcg_gen_ext32u_i64(op1, op1);
3107 } else {
3108 tcg_gen_ext32s_i64(op0, op0);
3109 tcg_gen_ext32s_i64(op1, op1);
3110 }
3111 }
3112
3113 /* For the helper, compress the different conditions into a computation
3114 * of how many iterations for which the condition is true.
caf1cefc 3115 */
bbd0968c
RH
3116 t0 = tcg_temp_new_i64();
3117 t1 = tcg_temp_new_i64();
34688dbc
RH
3118
3119 if (a->lt) {
3120 tcg_gen_sub_i64(t0, op1, op0);
3121 if (a->u) {
3122 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3123 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3124 } else {
3125 maxval = a->sf ? INT64_MAX : INT32_MAX;
3126 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3127 }
3128 } else {
3129 tcg_gen_sub_i64(t0, op0, op1);
3130 if (a->u) {
3131 maxval = 0;
3132 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3133 } else {
3134 maxval = a->sf ? INT64_MIN : INT32_MIN;
3135 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3136 }
3137 }
caf1cefc 3138
4481bbf2 3139 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3140 if (eq) {
caf1cefc
RH
3141 /* Equality means one more iteration. */
3142 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3143
34688dbc
RH
3144 /*
3145 * For the less-than while, if op1 is maxval (and the only time
3146 * the addition above could overflow), then we produce an all-true
3147 * predicate by setting the count to the vector length. This is
3148 * because the pseudocode is described as an increment + compare
3149 * loop, and the maximum integer would always compare true.
3150 * Similarly, the greater-than while has the same issue with the
3151 * minimum integer due to the decrement + compare loop.
bbd0968c 3152 */
34688dbc 3153 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3154 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3155 }
3156
bbd0968c
RH
3157 /* Bound to the maximum. */
3158 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3159
3160 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3161 tcg_gen_movi_i64(t1, 0);
3162 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3163 tcg_temp_free_i64(t1);
caf1cefc 3164
bbd0968c 3165 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3166 t2 = tcg_temp_new_i32();
3167 tcg_gen_extrl_i64_i32(t2, t0);
3168 tcg_temp_free_i64(t0);
bbd0968c
RH
3169
3170 /* Scale elements to bits. */
3171 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3172
e610906c
RH
3173 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3174 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3175
3176 ptr = tcg_temp_new_ptr();
3177 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3178
34688dbc 3179 if (a->lt) {
4481bbf2 3180 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3181 } else {
4481bbf2 3182 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3183 }
caf1cefc
RH
3184 do_pred_flags(t2);
3185
3186 tcg_temp_free_ptr(ptr);
3187 tcg_temp_free_i32(t2);
caf1cefc
RH
3188 return true;
3189}
3190
14f6dad1
RH
3191static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3192{
3193 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3194 TCGv_i32 t2;
14f6dad1
RH
3195 TCGv_ptr ptr;
3196 unsigned vsz = vec_full_reg_size(s);
3197 unsigned desc = 0;
3198
3199 if (!dc_isar_feature(aa64_sve2, s)) {
3200 return false;
3201 }
3202 if (!sve_access_check(s)) {
3203 return true;
3204 }
3205
3206 op0 = read_cpu_reg(s, a->rn, 1);
3207 op1 = read_cpu_reg(s, a->rm, 1);
3208
4481bbf2 3209 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3210 diff = tcg_temp_new_i64();
3211
3212 if (a->rw) {
3213 /* WHILERW */
3214 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3215 t1 = tcg_temp_new_i64();
3216 tcg_gen_sub_i64(diff, op0, op1);
3217 tcg_gen_sub_i64(t1, op1, op0);
3218 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3219 tcg_temp_free_i64(t1);
3220 /* Round down to a multiple of ESIZE. */
3221 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3222 /* If op1 == op0, diff == 0, and the condition is always true. */
3223 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3224 } else {
3225 /* WHILEWR */
3226 tcg_gen_sub_i64(diff, op1, op0);
3227 /* Round down to a multiple of ESIZE. */
3228 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3229 /* If op0 >= op1, diff <= 0, the condition is always true. */
3230 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3231 }
3232
3233 /* Bound to the maximum. */
3234 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3235
3236 /* Since we're bounded, pass as a 32-bit type. */
3237 t2 = tcg_temp_new_i32();
3238 tcg_gen_extrl_i64_i32(t2, diff);
3239 tcg_temp_free_i64(diff);
3240
3241 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3242 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3243
3244 ptr = tcg_temp_new_ptr();
3245 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3246
4481bbf2 3247 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3248 do_pred_flags(t2);
3249
3250 tcg_temp_free_ptr(ptr);
3251 tcg_temp_free_i32(t2);
14f6dad1
RH
3252 return true;
3253}
3254
ed491961
RH
3255/*
3256 *** SVE Integer Wide Immediate - Unpredicated Group
3257 */
3258
3a7be554 3259static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3260{
3261 if (a->esz == 0) {
3262 return false;
3263 }
3264 if (sve_access_check(s)) {
3265 unsigned vsz = vec_full_reg_size(s);
3266 int dofs = vec_full_reg_offset(s, a->rd);
3267 uint64_t imm;
3268
3269 /* Decode the VFP immediate. */
3270 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3271 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3272 }
3273 return true;
3274}
3275
3a7be554 3276static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3277{
ed491961
RH
3278 if (sve_access_check(s)) {
3279 unsigned vsz = vec_full_reg_size(s);
3280 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3281 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3282 }
3283 return true;
3284}
3285
48ca613d 3286TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3287
3a7be554 3288static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3289{
3290 a->imm = -a->imm;
3a7be554 3291 return trans_ADD_zzi(s, a);
6e6a157d
RH
3292}
3293
3a7be554 3294static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3295{
53229a77 3296 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3297 static const GVecGen2s op[4] = {
3298 { .fni8 = tcg_gen_vec_sub8_i64,
3299 .fniv = tcg_gen_sub_vec,
3300 .fno = gen_helper_sve_subri_b,
53229a77 3301 .opt_opc = vecop_list,
6e6a157d
RH
3302 .vece = MO_8,
3303 .scalar_first = true },
3304 { .fni8 = tcg_gen_vec_sub16_i64,
3305 .fniv = tcg_gen_sub_vec,
3306 .fno = gen_helper_sve_subri_h,
53229a77 3307 .opt_opc = vecop_list,
6e6a157d
RH
3308 .vece = MO_16,
3309 .scalar_first = true },
3310 { .fni4 = tcg_gen_sub_i32,
3311 .fniv = tcg_gen_sub_vec,
3312 .fno = gen_helper_sve_subri_s,
53229a77 3313 .opt_opc = vecop_list,
6e6a157d
RH
3314 .vece = MO_32,
3315 .scalar_first = true },
3316 { .fni8 = tcg_gen_sub_i64,
3317 .fniv = tcg_gen_sub_vec,
3318 .fno = gen_helper_sve_subri_d,
53229a77 3319 .opt_opc = vecop_list,
6e6a157d
RH
3320 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3321 .vece = MO_64,
3322 .scalar_first = true }
3323 };
3324
6e6a157d
RH
3325 if (sve_access_check(s)) {
3326 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3327 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3328 vec_full_reg_offset(s, a->rn),
9fff3fcc 3329 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3330 }
3331 return true;
3332}
3333
fa4bd72c 3334TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3335
3a7be554 3336static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3337{
6e6a157d 3338 if (sve_access_check(s)) {
138a1f7b
RH
3339 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3340 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3341 }
3342 return true;
3343}
3344
17b54d1c
RH
3345TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3346TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3347TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3348TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3349
3350static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3351{
3352 if (sve_access_check(s)) {
3353 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3354 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3355 vec_full_reg_offset(s, a->rn),
138a1f7b 3356 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3357 }
3358 return true;
3359}
3360
3361#define DO_ZZI(NAME, name) \
ef4a3958 3362 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3363 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3364 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3365 }; \
ef4a3958 3366 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3367
3368DO_ZZI(SMAX, smax)
3369DO_ZZI(UMAX, umax)
3370DO_ZZI(SMIN, smin)
3371DO_ZZI(UMIN, umin)
3372
3373#undef DO_ZZI
3374
5f425b92
RH
3375static gen_helper_gvec_4 * const dot_fns[2][2] = {
3376 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3377 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3378};
3379TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3380 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3381
814d4c52
RH
3382/*
3383 * SVE Multiply - Indexed
3384 */
3385
f3500a25
RH
3386TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3387 gen_helper_gvec_sdot_idx_b, a)
3388TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3389 gen_helper_gvec_sdot_idx_h, a)
3390TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3391 gen_helper_gvec_udot_idx_b, a)
3392TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3393 gen_helper_gvec_udot_idx_h, a)
3394
3395TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3396 gen_helper_gvec_sudot_idx_b, a)
3397TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3398 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3399
814d4c52 3400#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3401 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3402 a->rd, a->rn, a->rm, a->index)
814d4c52 3403
af031f64
RH
3404DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3405DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3406DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3407
af031f64
RH
3408DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3409DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3410DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3411
af031f64
RH
3412DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3413DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3414DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3415
814d4c52
RH
3416#undef DO_SVE2_RRX
3417
b95f5eeb 3418#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3419 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3420 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3421
3422DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3423DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3424DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3425DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3426
3427DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3428DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3429DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3430DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3431
3432DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3433DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3434DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3435DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3436
b95f5eeb
RH
3437#undef DO_SVE2_RRX_TB
3438
8a02aac7 3439#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3440 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3441
8681eb76
RH
3442DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3443DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3444DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3445
8681eb76
RH
3446DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3447DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3448DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3449
8681eb76
RH
3450DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3451DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3452DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3453
8681eb76
RH
3454DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3455DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3456DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3457
8a02aac7
RH
3458#undef DO_SVE2_RRXR
3459
c5c455d7 3460#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3461 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3462 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3463
3464DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3465DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3466DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3467DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3468
3469DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3470DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3471DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3472DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3473
3474DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3475DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3476DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3477DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3478
3479DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3480DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3481DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3482DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3483
3484DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3485DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3486DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3487DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3488
3489DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3490DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3491DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3492DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3493
3494#undef DO_SVE2_RRXR_TB
3495
3b787ed8 3496#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3497 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3498 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3499
3500DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3501DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3502
3503DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3504DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3505
21068f39
RH
3506DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3507DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3508
3b787ed8
RH
3509#undef DO_SVE2_RRXR_ROT
3510
ca40a6e6
RH
3511/*
3512 *** SVE Floating Point Multiply-Add Indexed Group
3513 */
3514
0a82d963 3515static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3516{
41bf9b67
RH
3517 static gen_helper_gvec_4_ptr * const fns[4] = {
3518 NULL,
ca40a6e6
RH
3519 gen_helper_gvec_fmla_idx_h,
3520 gen_helper_gvec_fmla_idx_s,
3521 gen_helper_gvec_fmla_idx_d,
3522 };
41bf9b67
RH
3523 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3524 (a->index << 1) | sub,
3525 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3526}
3527
0a82d963
RH
3528static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3529{
3530 return do_FMLA_zzxz(s, a, false);
3531}
3532
3533static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3534{
3535 return do_FMLA_zzxz(s, a, true);
3536}
3537
ca40a6e6
RH
3538/*
3539 *** SVE Floating Point Multiply Indexed Group
3540 */
3541
3a7be554 3542static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3543{
3544 static gen_helper_gvec_3_ptr * const fns[3] = {
3545 gen_helper_gvec_fmul_idx_h,
3546 gen_helper_gvec_fmul_idx_s,
3547 gen_helper_gvec_fmul_idx_d,
3548 };
3549
3550 if (sve_access_check(s)) {
3551 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3552 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3553 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3554 vec_full_reg_offset(s, a->rn),
3555 vec_full_reg_offset(s, a->rm),
3556 status, vsz, vsz, a->index, fns[a->esz - 1]);
3557 tcg_temp_free_ptr(status);
3558 }
3559 return true;
3560}
3561
23fbe79f
RH
3562/*
3563 *** SVE Floating Point Fast Reduction Group
3564 */
3565
3566typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3567 TCGv_ptr, TCGv_i32);
3568
3569static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3570 gen_helper_fp_reduce *fn)
3571{
3572 unsigned vsz = vec_full_reg_size(s);
3573 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 3574 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3575 TCGv_ptr t_zn, t_pg, status;
3576 TCGv_i64 temp;
3577
3578 temp = tcg_temp_new_i64();
3579 t_zn = tcg_temp_new_ptr();
3580 t_pg = tcg_temp_new_ptr();
3581
3582 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3583 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3584 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3585
3586 fn(temp, t_zn, t_pg, status, t_desc);
3587 tcg_temp_free_ptr(t_zn);
3588 tcg_temp_free_ptr(t_pg);
3589 tcg_temp_free_ptr(status);
23fbe79f
RH
3590
3591 write_fp_dreg(s, a->rd, temp);
3592 tcg_temp_free_i64(temp);
3593}
3594
3595#define DO_VPZ(NAME, name) \
3a7be554 3596static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3597{ \
3598 static gen_helper_fp_reduce * const fns[3] = { \
3599 gen_helper_sve_##name##_h, \
3600 gen_helper_sve_##name##_s, \
3601 gen_helper_sve_##name##_d, \
3602 }; \
3603 if (a->esz == 0) { \
3604 return false; \
3605 } \
3606 if (sve_access_check(s)) { \
3607 do_reduce(s, a, fns[a->esz - 1]); \
3608 } \
3609 return true; \
3610}
3611
3612DO_VPZ(FADDV, faddv)
3613DO_VPZ(FMINNMV, fminnmv)
3614DO_VPZ(FMAXNMV, fmaxnmv)
3615DO_VPZ(FMINV, fminv)
3616DO_VPZ(FMAXV, fmaxv)
3617
3887c038
RH
3618/*
3619 *** SVE Floating Point Unary Operations - Unpredicated Group
3620 */
3621
3622static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3623{
3624 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3625 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3626
3627 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3628 vec_full_reg_offset(s, a->rn),
3629 status, vsz, vsz, 0, fn);
3630 tcg_temp_free_ptr(status);
3631}
3632
3a7be554 3633static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3634{
3635 static gen_helper_gvec_2_ptr * const fns[3] = {
3636 gen_helper_gvec_frecpe_h,
3637 gen_helper_gvec_frecpe_s,
3638 gen_helper_gvec_frecpe_d,
3639 };
3640 if (a->esz == 0) {
3641 return false;
3642 }
3643 if (sve_access_check(s)) {
3644 do_zz_fp(s, a, fns[a->esz - 1]);
3645 }
3646 return true;
3647}
3648
3a7be554 3649static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3650{
3651 static gen_helper_gvec_2_ptr * const fns[3] = {
3652 gen_helper_gvec_frsqrte_h,
3653 gen_helper_gvec_frsqrte_s,
3654 gen_helper_gvec_frsqrte_d,
3655 };
3656 if (a->esz == 0) {
3657 return false;
3658 }
3659 if (sve_access_check(s)) {
3660 do_zz_fp(s, a, fns[a->esz - 1]);
3661 }
3662 return true;
3663}
3664
4d2e2a03
RH
3665/*
3666 *** SVE Floating Point Compare with Zero Group
3667 */
3668
3669static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3670 gen_helper_gvec_3_ptr *fn)
3671{
3672 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3673 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3674
3675 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3676 vec_full_reg_offset(s, a->rn),
3677 pred_full_reg_offset(s, a->pg),
3678 status, vsz, vsz, 0, fn);
3679 tcg_temp_free_ptr(status);
3680}
3681
3682#define DO_PPZ(NAME, name) \
3a7be554 3683static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3684{ \
3685 static gen_helper_gvec_3_ptr * const fns[3] = { \
3686 gen_helper_sve_##name##_h, \
3687 gen_helper_sve_##name##_s, \
3688 gen_helper_sve_##name##_d, \
3689 }; \
3690 if (a->esz == 0) { \
3691 return false; \
3692 } \
3693 if (sve_access_check(s)) { \
3694 do_ppz_fp(s, a, fns[a->esz - 1]); \
3695 } \
3696 return true; \
3697}
3698
3699DO_PPZ(FCMGE_ppz0, fcmge0)
3700DO_PPZ(FCMGT_ppz0, fcmgt0)
3701DO_PPZ(FCMLE_ppz0, fcmle0)
3702DO_PPZ(FCMLT_ppz0, fcmlt0)
3703DO_PPZ(FCMEQ_ppz0, fcmeq0)
3704DO_PPZ(FCMNE_ppz0, fcmne0)
3705
3706#undef DO_PPZ
3707
67fcd9ad
RH
3708/*
3709 *** SVE floating-point trig multiply-add coefficient
3710 */
3711
3a7be554 3712static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3713{
3714 static gen_helper_gvec_3_ptr * const fns[3] = {
3715 gen_helper_sve_ftmad_h,
3716 gen_helper_sve_ftmad_s,
3717 gen_helper_sve_ftmad_d,
3718 };
3719
3720 if (a->esz == 0) {
3721 return false;
3722 }
3723 if (sve_access_check(s)) {
3724 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3725 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3726 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3727 vec_full_reg_offset(s, a->rn),
3728 vec_full_reg_offset(s, a->rm),
3729 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3730 tcg_temp_free_ptr(status);
3731 }
3732 return true;
3733}
3734
7f9ddf64
RH
3735/*
3736 *** SVE Floating Point Accumulating Reduction Group
3737 */
3738
3a7be554 3739static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3740{
3741 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3742 TCGv_ptr, TCGv_ptr, TCGv_i32);
3743 static fadda_fn * const fns[3] = {
3744 gen_helper_sve_fadda_h,
3745 gen_helper_sve_fadda_s,
3746 gen_helper_sve_fadda_d,
3747 };
3748 unsigned vsz = vec_full_reg_size(s);
3749 TCGv_ptr t_rm, t_pg, t_fpst;
3750 TCGv_i64 t_val;
3751 TCGv_i32 t_desc;
3752
3753 if (a->esz == 0) {
3754 return false;
3755 }
3756 if (!sve_access_check(s)) {
3757 return true;
3758 }
3759
3760 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3761 t_rm = tcg_temp_new_ptr();
3762 t_pg = tcg_temp_new_ptr();
3763 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3764 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3765 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3766 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3767
3768 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3769
7f9ddf64
RH
3770 tcg_temp_free_ptr(t_fpst);
3771 tcg_temp_free_ptr(t_pg);
3772 tcg_temp_free_ptr(t_rm);
3773
3774 write_fp_dreg(s, a->rd, t_val);
3775 tcg_temp_free_i64(t_val);
3776 return true;
3777}
3778
29b80469
RH
3779/*
3780 *** SVE Floating Point Arithmetic - Unpredicated Group
3781 */
3782
3783static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3784 gen_helper_gvec_3_ptr *fn)
3785{
3786 if (fn == NULL) {
3787 return false;
3788 }
3789 if (sve_access_check(s)) {
3790 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3791 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
3792 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3793 vec_full_reg_offset(s, a->rn),
3794 vec_full_reg_offset(s, a->rm),
3795 status, vsz, vsz, 0, fn);
3796 tcg_temp_free_ptr(status);
3797 }
3798 return true;
3799}
3800
3801
3802#define DO_FP3(NAME, name) \
3a7be554 3803static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3804{ \
3805 static gen_helper_gvec_3_ptr * const fns[4] = { \
3806 NULL, gen_helper_gvec_##name##_h, \
3807 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3808 }; \
3809 return do_zzz_fp(s, a, fns[a->esz]); \
3810}
3811
3812DO_FP3(FADD_zzz, fadd)
3813DO_FP3(FSUB_zzz, fsub)
3814DO_FP3(FMUL_zzz, fmul)
3815DO_FP3(FTSMUL, ftsmul)
3816DO_FP3(FRECPS, recps)
3817DO_FP3(FRSQRTS, rsqrts)
3818
3819#undef DO_FP3
3820
ec3b87c2
RH
3821/*
3822 *** SVE Floating Point Arithmetic - Predicated Group
3823 */
3824
3825static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3826 gen_helper_gvec_4_ptr *fn)
3827{
3828 if (fn == NULL) {
3829 return false;
3830 }
3831 if (sve_access_check(s)) {
3832 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3833 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3834 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3835 vec_full_reg_offset(s, a->rn),
3836 vec_full_reg_offset(s, a->rm),
3837 pred_full_reg_offset(s, a->pg),
3838 status, vsz, vsz, 0, fn);
3839 tcg_temp_free_ptr(status);
3840 }
3841 return true;
3842}
3843
3844#define DO_FP3(NAME, name) \
3a7be554 3845static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3846{ \
3847 static gen_helper_gvec_4_ptr * const fns[4] = { \
3848 NULL, gen_helper_sve_##name##_h, \
3849 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3850 }; \
3851 return do_zpzz_fp(s, a, fns[a->esz]); \
3852}
3853
3854DO_FP3(FADD_zpzz, fadd)
3855DO_FP3(FSUB_zpzz, fsub)
3856DO_FP3(FMUL_zpzz, fmul)
3857DO_FP3(FMIN_zpzz, fmin)
3858DO_FP3(FMAX_zpzz, fmax)
3859DO_FP3(FMINNM_zpzz, fminnum)
3860DO_FP3(FMAXNM_zpzz, fmaxnum)
3861DO_FP3(FABD, fabd)
3862DO_FP3(FSCALE, fscalbn)
3863DO_FP3(FDIV, fdiv)
3864DO_FP3(FMULX, fmulx)
3865
3866#undef DO_FP3
8092c6a3 3867
cc48affe
RH
3868typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3869 TCGv_i64, TCGv_ptr, TCGv_i32);
3870
3871static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3872 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3873{
3874 unsigned vsz = vec_full_reg_size(s);
3875 TCGv_ptr t_zd, t_zn, t_pg, status;
3876 TCGv_i32 desc;
3877
3878 t_zd = tcg_temp_new_ptr();
3879 t_zn = tcg_temp_new_ptr();
3880 t_pg = tcg_temp_new_ptr();
3881 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3882 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3883 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3884
cdfb22bb 3885 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3886 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
3887 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3888
cc48affe
RH
3889 tcg_temp_free_ptr(status);
3890 tcg_temp_free_ptr(t_pg);
3891 tcg_temp_free_ptr(t_zn);
3892 tcg_temp_free_ptr(t_zd);
3893}
3894
3895static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3896 gen_helper_sve_fp2scalar *fn)
3897{
138a1f7b
RH
3898 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3899 tcg_constant_i64(imm), fn);
cc48affe
RH
3900}
3901
3902#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3903static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3904{ \
3905 static gen_helper_sve_fp2scalar * const fns[3] = { \
3906 gen_helper_sve_##name##_h, \
3907 gen_helper_sve_##name##_s, \
3908 gen_helper_sve_##name##_d \
3909 }; \
3910 static uint64_t const val[3][2] = { \
3911 { float16_##const0, float16_##const1 }, \
3912 { float32_##const0, float32_##const1 }, \
3913 { float64_##const0, float64_##const1 }, \
3914 }; \
3915 if (a->esz == 0) { \
3916 return false; \
3917 } \
3918 if (sve_access_check(s)) { \
3919 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3920 } \
3921 return true; \
3922}
3923
cc48affe
RH
3924DO_FP_IMM(FADD, fadds, half, one)
3925DO_FP_IMM(FSUB, fsubs, half, one)
3926DO_FP_IMM(FMUL, fmuls, half, two)
3927DO_FP_IMM(FSUBR, fsubrs, half, one)
3928DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3929DO_FP_IMM(FMINNM, fminnms, zero, one)
3930DO_FP_IMM(FMAX, fmaxs, zero, one)
3931DO_FP_IMM(FMIN, fmins, zero, one)
3932
3933#undef DO_FP_IMM
3934
abfdefd5
RH
3935static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3936 gen_helper_gvec_4_ptr *fn)
3937{
3938 if (fn == NULL) {
3939 return false;
3940 }
3941 if (sve_access_check(s)) {
3942 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3943 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3944 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3945 vec_full_reg_offset(s, a->rn),
3946 vec_full_reg_offset(s, a->rm),
3947 pred_full_reg_offset(s, a->pg),
3948 status, vsz, vsz, 0, fn);
3949 tcg_temp_free_ptr(status);
3950 }
3951 return true;
3952}
3953
3954#define DO_FPCMP(NAME, name) \
3a7be554 3955static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3956{ \
3957 static gen_helper_gvec_4_ptr * const fns[4] = { \
3958 NULL, gen_helper_sve_##name##_h, \
3959 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3960 }; \
3961 return do_fp_cmp(s, a, fns[a->esz]); \
3962}
3963
3964DO_FPCMP(FCMGE, fcmge)
3965DO_FPCMP(FCMGT, fcmgt)
3966DO_FPCMP(FCMEQ, fcmeq)
3967DO_FPCMP(FCMNE, fcmne)
3968DO_FPCMP(FCMUO, fcmuo)
3969DO_FPCMP(FACGE, facge)
3970DO_FPCMP(FACGT, facgt)
3971
3972#undef DO_FPCMP
3973
3a7be554 3974static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3975{
3976 static gen_helper_gvec_4_ptr * const fns[3] = {
3977 gen_helper_sve_fcadd_h,
3978 gen_helper_sve_fcadd_s,
3979 gen_helper_sve_fcadd_d
3980 };
3981
3982 if (a->esz == 0) {
3983 return false;
3984 }
3985 if (sve_access_check(s)) {
3986 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3987 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3988 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3989 vec_full_reg_offset(s, a->rn),
3990 vec_full_reg_offset(s, a->rm),
3991 pred_full_reg_offset(s, a->pg),
3992 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3993 tcg_temp_free_ptr(status);
3994 }
3995 return true;
3996}
3997
08975da9
RH
3998static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3999 gen_helper_gvec_5_ptr *fn)
6ceabaad 4000{
08975da9 4001 if (a->esz == 0) {
6ceabaad
RH
4002 return false;
4003 }
08975da9
RH
4004 if (sve_access_check(s)) {
4005 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4006 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4007 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4008 vec_full_reg_offset(s, a->rn),
4009 vec_full_reg_offset(s, a->rm),
4010 vec_full_reg_offset(s, a->ra),
4011 pred_full_reg_offset(s, a->pg),
4012 status, vsz, vsz, 0, fn);
4013 tcg_temp_free_ptr(status);
6ceabaad 4014 }
6ceabaad
RH
4015 return true;
4016}
4017
4018#define DO_FMLA(NAME, name) \
3a7be554 4019static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4020{ \
08975da9 4021 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4022 NULL, gen_helper_sve_##name##_h, \
4023 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4024 }; \
4025 return do_fmla(s, a, fns[a->esz]); \
4026}
4027
4028DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4029DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4030DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4031DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4032
4033#undef DO_FMLA
4034
3a7be554 4035static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4036{
08975da9
RH
4037 static gen_helper_gvec_5_ptr * const fns[4] = {
4038 NULL,
05f48bab
RH
4039 gen_helper_sve_fcmla_zpzzz_h,
4040 gen_helper_sve_fcmla_zpzzz_s,
4041 gen_helper_sve_fcmla_zpzzz_d,
4042 };
4043
4044 if (a->esz == 0) {
4045 return false;
4046 }
4047 if (sve_access_check(s)) {
4048 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4049 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4050 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4051 vec_full_reg_offset(s, a->rn),
4052 vec_full_reg_offset(s, a->rm),
4053 vec_full_reg_offset(s, a->ra),
4054 pred_full_reg_offset(s, a->pg),
4055 status, vsz, vsz, a->rot, fns[a->esz]);
4056 tcg_temp_free_ptr(status);
05f48bab
RH
4057 }
4058 return true;
4059}
4060
3a7be554 4061static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4062{
41bf9b67
RH
4063 static gen_helper_gvec_4_ptr * const fns[4] = {
4064 NULL,
18fc2405
RH
4065 gen_helper_gvec_fcmlah_idx,
4066 gen_helper_gvec_fcmlas_idx,
41bf9b67 4067 NULL,
18fc2405
RH
4068 };
4069
18fc2405 4070 tcg_debug_assert(a->rd == a->ra);
41bf9b67
RH
4071
4072 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
4073 a->index * 4 + a->rot,
4074 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
4075}
4076
8092c6a3
RH
4077/*
4078 *** SVE Floating Point Unary Operations Predicated Group
4079 */
4080
4081static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4082 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4083{
4084 if (sve_access_check(s)) {
4085 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4086 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4087 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4088 vec_full_reg_offset(s, rn),
4089 pred_full_reg_offset(s, pg),
4090 status, vsz, vsz, 0, fn);
4091 tcg_temp_free_ptr(status);
4092 }
4093 return true;
4094}
4095
3a7be554 4096static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4097{
e4ab5124 4098 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4099}
4100
3a7be554 4101static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4102{
4103 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4104}
4105
d29b17ca
RH
4106static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4107{
4108 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4109 return false;
4110 }
4111 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4112}
4113
3a7be554 4114static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4115{
e4ab5124 4116 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4117}
4118
3a7be554 4119static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4120{
4121 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4122}
4123
3a7be554 4124static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4125{
4126 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4127}
4128
3a7be554 4129static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4130{
4131 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4132}
4133
3a7be554 4134static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4135{
4136 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4137}
4138
3a7be554 4139static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4140{
4141 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4142}
4143
3a7be554 4144static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4145{
4146 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4147}
4148
3a7be554 4149static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4150{
4151 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4152}
4153
3a7be554 4154static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4155{
4156 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4157}
4158
3a7be554 4159static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4160{
4161 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4162}
4163
3a7be554 4164static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4165{
4166 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4167}
4168
3a7be554 4169static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4170{
4171 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4172}
4173
3a7be554 4174static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4175{
4176 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4177}
4178
3a7be554 4179static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4180{
4181 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4182}
4183
3a7be554 4184static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4185{
4186 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4187}
4188
3a7be554 4189static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4190{
4191 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4192}
4193
3a7be554 4194static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4195{
4196 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4197}
4198
3a7be554 4199static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4200{
4201 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4202}
4203
cda3c753
RH
4204static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4205 gen_helper_sve_frint_h,
4206 gen_helper_sve_frint_s,
4207 gen_helper_sve_frint_d
4208};
4209
3a7be554 4210static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4211{
4212 if (a->esz == 0) {
4213 return false;
4214 }
4215 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4216 frint_fns[a->esz - 1]);
4217}
4218
3a7be554 4219static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4220{
4221 static gen_helper_gvec_3_ptr * const fns[3] = {
4222 gen_helper_sve_frintx_h,
4223 gen_helper_sve_frintx_s,
4224 gen_helper_sve_frintx_d
4225 };
4226 if (a->esz == 0) {
4227 return false;
4228 }
4229 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4230}
4231
95365277
SL
4232static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4233 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4234{
cda3c753
RH
4235 if (sve_access_check(s)) {
4236 unsigned vsz = vec_full_reg_size(s);
4237 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4238 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4239
4240 gen_helper_set_rmode(tmode, tmode, status);
4241
4242 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4243 vec_full_reg_offset(s, a->rn),
4244 pred_full_reg_offset(s, a->pg),
95365277 4245 status, vsz, vsz, 0, fn);
cda3c753
RH
4246
4247 gen_helper_set_rmode(tmode, tmode, status);
4248 tcg_temp_free_i32(tmode);
4249 tcg_temp_free_ptr(status);
4250 }
4251 return true;
4252}
4253
3a7be554 4254static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4255{
95365277
SL
4256 if (a->esz == 0) {
4257 return false;
4258 }
4259 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4260}
4261
3a7be554 4262static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4263{
95365277
SL
4264 if (a->esz == 0) {
4265 return false;
4266 }
4267 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4268}
4269
3a7be554 4270static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4271{
95365277
SL
4272 if (a->esz == 0) {
4273 return false;
4274 }
4275 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4276}
4277
3a7be554 4278static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4279{
95365277
SL
4280 if (a->esz == 0) {
4281 return false;
4282 }
4283 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4284}
4285
3a7be554 4286static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4287{
95365277
SL
4288 if (a->esz == 0) {
4289 return false;
4290 }
4291 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4292}
4293
3a7be554 4294static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4295{
4296 static gen_helper_gvec_3_ptr * const fns[3] = {
4297 gen_helper_sve_frecpx_h,
4298 gen_helper_sve_frecpx_s,
4299 gen_helper_sve_frecpx_d
4300 };
4301 if (a->esz == 0) {
4302 return false;
4303 }
4304 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4305}
4306
3a7be554 4307static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4308{
4309 static gen_helper_gvec_3_ptr * const fns[3] = {
4310 gen_helper_sve_fsqrt_h,
4311 gen_helper_sve_fsqrt_s,
4312 gen_helper_sve_fsqrt_d
4313 };
4314 if (a->esz == 0) {
4315 return false;
4316 }
4317 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4318}
4319
3a7be554 4320static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4321{
4322 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4323}
4324
3a7be554 4325static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4326{
4327 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4328}
4329
3a7be554 4330static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4331{
4332 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4333}
4334
3a7be554 4335static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4336{
4337 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4338}
4339
3a7be554 4340static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4341{
4342 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4343}
4344
3a7be554 4345static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4346{
4347 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4348}
4349
3a7be554 4350static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4351{
4352 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4353}
4354
3a7be554 4355static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4356{
4357 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4358}
4359
3a7be554 4360static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4361{
4362 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4363}
4364
3a7be554 4365static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4366{
4367 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4368}
4369
3a7be554 4370static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4371{
4372 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4373}
4374
3a7be554 4375static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4376{
4377 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4378}
4379
3a7be554 4380static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4381{
4382 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4383}
4384
3a7be554 4385static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4386{
4387 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4388}
4389
d1822297
RH
4390/*
4391 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4392 */
4393
4394/* Subroutine loading a vector register at VOFS of LEN bytes.
4395 * The load should begin at the address Rn + IMM.
4396 */
4397
19f2acc9 4398static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4399{
19f2acc9
RH
4400 int len_align = QEMU_ALIGN_DOWN(len, 8);
4401 int len_remain = len % 8;
4402 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4403 int midx = get_mem_index(s);
b2aa8879 4404 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4405
b2aa8879
RH
4406 dirty_addr = tcg_temp_new_i64();
4407 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4408 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4409 tcg_temp_free_i64(dirty_addr);
d1822297 4410
b2aa8879
RH
4411 /*
4412 * Note that unpredicated load/store of vector/predicate registers
d1822297 4413 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4414 * operations on larger quantities.
d1822297
RH
4415 * Attempt to keep code expansion to a minimum by limiting the
4416 * amount of unrolling done.
4417 */
4418 if (nparts <= 4) {
4419 int i;
4420
b2aa8879 4421 t0 = tcg_temp_new_i64();
d1822297 4422 for (i = 0; i < len_align; i += 8) {
fc313c64 4423 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4424 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4425 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4426 }
b2aa8879 4427 tcg_temp_free_i64(t0);
d1822297
RH
4428 } else {
4429 TCGLabel *loop = gen_new_label();
4430 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4431
b2aa8879
RH
4432 /* Copy the clean address into a local temp, live across the loop. */
4433 t0 = clean_addr;
4b4dc975 4434 clean_addr = new_tmp_a64_local(s);
b2aa8879 4435 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4436
b2aa8879 4437 gen_set_label(loop);
d1822297 4438
b2aa8879 4439 t0 = tcg_temp_new_i64();
fc313c64 4440 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4441 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4442
b2aa8879 4443 tp = tcg_temp_new_ptr();
d1822297
RH
4444 tcg_gen_add_ptr(tp, cpu_env, i);
4445 tcg_gen_addi_ptr(i, i, 8);
4446 tcg_gen_st_i64(t0, tp, vofs);
4447 tcg_temp_free_ptr(tp);
b2aa8879 4448 tcg_temp_free_i64(t0);
d1822297
RH
4449
4450 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4451 tcg_temp_free_ptr(i);
4452 }
4453
b2aa8879
RH
4454 /*
4455 * Predicate register loads can be any multiple of 2.
d1822297
RH
4456 * Note that we still store the entire 64-bit unit into cpu_env.
4457 */
4458 if (len_remain) {
b2aa8879 4459 t0 = tcg_temp_new_i64();
d1822297
RH
4460 switch (len_remain) {
4461 case 2:
4462 case 4:
4463 case 8:
b2aa8879
RH
4464 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4465 MO_LE | ctz32(len_remain));
d1822297
RH
4466 break;
4467
4468 case 6:
4469 t1 = tcg_temp_new_i64();
b2aa8879
RH
4470 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4471 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4472 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4473 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4474 tcg_temp_free_i64(t1);
4475 break;
4476
4477 default:
4478 g_assert_not_reached();
4479 }
4480 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4481 tcg_temp_free_i64(t0);
d1822297 4482 }
d1822297
RH
4483}
4484
5047c204 4485/* Similarly for stores. */
19f2acc9 4486static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4487{
19f2acc9
RH
4488 int len_align = QEMU_ALIGN_DOWN(len, 8);
4489 int len_remain = len % 8;
4490 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4491 int midx = get_mem_index(s);
bba87d0a 4492 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4493
bba87d0a
RH
4494 dirty_addr = tcg_temp_new_i64();
4495 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4496 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4497 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4498
4499 /* Note that unpredicated load/store of vector/predicate registers
4500 * are defined as a stream of bytes, which equates to little-endian
4501 * operations on larger quantities. There is no nice way to force
4502 * a little-endian store for aarch64_be-linux-user out of line.
4503 *
4504 * Attempt to keep code expansion to a minimum by limiting the
4505 * amount of unrolling done.
4506 */
4507 if (nparts <= 4) {
4508 int i;
4509
bba87d0a 4510 t0 = tcg_temp_new_i64();
5047c204
RH
4511 for (i = 0; i < len_align; i += 8) {
4512 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4513 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4514 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4515 }
bba87d0a 4516 tcg_temp_free_i64(t0);
5047c204
RH
4517 } else {
4518 TCGLabel *loop = gen_new_label();
bba87d0a 4519 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4520
bba87d0a
RH
4521 /* Copy the clean address into a local temp, live across the loop. */
4522 t0 = clean_addr;
4b4dc975 4523 clean_addr = new_tmp_a64_local(s);
bba87d0a 4524 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4525
bba87d0a 4526 gen_set_label(loop);
5047c204 4527
bba87d0a
RH
4528 t0 = tcg_temp_new_i64();
4529 tp = tcg_temp_new_ptr();
4530 tcg_gen_add_ptr(tp, cpu_env, i);
4531 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4532 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4533 tcg_temp_free_ptr(tp);
4534
fc313c64 4535 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4536 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4537 tcg_temp_free_i64(t0);
5047c204
RH
4538
4539 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4540 tcg_temp_free_ptr(i);
4541 }
4542
4543 /* Predicate register stores can be any multiple of 2. */
4544 if (len_remain) {
bba87d0a 4545 t0 = tcg_temp_new_i64();
5047c204 4546 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4547
4548 switch (len_remain) {
4549 case 2:
4550 case 4:
4551 case 8:
bba87d0a
RH
4552 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4553 MO_LE | ctz32(len_remain));
5047c204
RH
4554 break;
4555
4556 case 6:
bba87d0a
RH
4557 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4558 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4559 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4560 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4561 break;
4562
4563 default:
4564 g_assert_not_reached();
4565 }
bba87d0a 4566 tcg_temp_free_i64(t0);
5047c204 4567 }
5047c204
RH
4568}
4569
3a7be554 4570static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4571{
4572 if (sve_access_check(s)) {
4573 int size = vec_full_reg_size(s);
4574 int off = vec_full_reg_offset(s, a->rd);
4575 do_ldr(s, off, size, a->rn, a->imm * size);
4576 }
4577 return true;
4578}
4579
3a7be554 4580static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4581{
4582 if (sve_access_check(s)) {
4583 int size = pred_full_reg_size(s);
4584 int off = pred_full_reg_offset(s, a->rd);
4585 do_ldr(s, off, size, a->rn, a->imm * size);
4586 }
4587 return true;
4588}
c4e7c493 4589
3a7be554 4590static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4591{
4592 if (sve_access_check(s)) {
4593 int size = vec_full_reg_size(s);
4594 int off = vec_full_reg_offset(s, a->rd);
4595 do_str(s, off, size, a->rn, a->imm * size);
4596 }
4597 return true;
4598}
4599
3a7be554 4600static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4601{
4602 if (sve_access_check(s)) {
4603 int size = pred_full_reg_size(s);
4604 int off = pred_full_reg_offset(s, a->rd);
4605 do_str(s, off, size, a->rn, a->imm * size);
4606 }
4607 return true;
4608}
4609
c4e7c493
RH
4610/*
4611 *** SVE Memory - Contiguous Load Group
4612 */
4613
4614/* The memory mode of the dtype. */
14776ab5 4615static const MemOp dtype_mop[16] = {
c4e7c493
RH
4616 MO_UB, MO_UB, MO_UB, MO_UB,
4617 MO_SL, MO_UW, MO_UW, MO_UW,
4618 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4619 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4620};
4621
4622#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4623
4624/* The vector element size of dtype. */
4625static const uint8_t dtype_esz[16] = {
4626 0, 1, 2, 3,
4627 3, 1, 2, 3,
4628 3, 2, 2, 3,
4629 3, 2, 1, 3
4630};
4631
4632static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4633 int dtype, uint32_t mte_n, bool is_write,
4634 gen_helper_gvec_mem *fn)
c4e7c493
RH
4635{
4636 unsigned vsz = vec_full_reg_size(s);
4637 TCGv_ptr t_pg;
206adacf 4638 int desc = 0;
c4e7c493 4639
206adacf
RH
4640 /*
4641 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4642 * registers as pointers, so encode the regno into the data field.
4643 * For consistency, do this even for LD1.
4644 */
9473d0ec 4645 if (s->mte_active[0]) {
206adacf
RH
4646 int msz = dtype_msz(dtype);
4647
4648 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4649 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4650 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4651 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4652 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4653 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4654 } else {
4655 addr = clean_data_tbi(s, addr);
206adacf 4656 }
9473d0ec 4657
206adacf 4658 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4659 t_pg = tcg_temp_new_ptr();
4660
4661 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4662 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4663
4664 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4665}
4666
c182c6db
RH
4667/* Indexed by [mte][be][dtype][nreg] */
4668static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4669 { /* mte inactive, little-endian */
4670 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4671 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4672 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4673 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4674 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4675
4676 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4677 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4678 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4679 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4680 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4681
4682 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4683 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4684 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4685 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4686 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4687
4688 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4689 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4690 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4691 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4692 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4693
4694 /* mte inactive, big-endian */
4695 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4696 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4697 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4698 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4699 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4700
4701 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4702 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4703 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4704 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4705 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4706
4707 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4708 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4709 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4710 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4711 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4712
4713 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4714 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4715 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4716 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4717 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4718
4719 { /* mte active, little-endian */
4720 { { gen_helper_sve_ld1bb_r_mte,
4721 gen_helper_sve_ld2bb_r_mte,
4722 gen_helper_sve_ld3bb_r_mte,
4723 gen_helper_sve_ld4bb_r_mte },
4724 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4725 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4726 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4727
4728 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4729 { gen_helper_sve_ld1hh_le_r_mte,
4730 gen_helper_sve_ld2hh_le_r_mte,
4731 gen_helper_sve_ld3hh_le_r_mte,
4732 gen_helper_sve_ld4hh_le_r_mte },
4733 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4734 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4735
4736 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4737 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4738 { gen_helper_sve_ld1ss_le_r_mte,
4739 gen_helper_sve_ld2ss_le_r_mte,
4740 gen_helper_sve_ld3ss_le_r_mte,
4741 gen_helper_sve_ld4ss_le_r_mte },
4742 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4743
4744 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4745 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4746 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4747 { gen_helper_sve_ld1dd_le_r_mte,
4748 gen_helper_sve_ld2dd_le_r_mte,
4749 gen_helper_sve_ld3dd_le_r_mte,
4750 gen_helper_sve_ld4dd_le_r_mte } },
4751
4752 /* mte active, big-endian */
4753 { { gen_helper_sve_ld1bb_r_mte,
4754 gen_helper_sve_ld2bb_r_mte,
4755 gen_helper_sve_ld3bb_r_mte,
4756 gen_helper_sve_ld4bb_r_mte },
4757 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4758 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4759 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4760
4761 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4762 { gen_helper_sve_ld1hh_be_r_mte,
4763 gen_helper_sve_ld2hh_be_r_mte,
4764 gen_helper_sve_ld3hh_be_r_mte,
4765 gen_helper_sve_ld4hh_be_r_mte },
4766 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4767 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4768
4769 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4770 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4771 { gen_helper_sve_ld1ss_be_r_mte,
4772 gen_helper_sve_ld2ss_be_r_mte,
4773 gen_helper_sve_ld3ss_be_r_mte,
4774 gen_helper_sve_ld4ss_be_r_mte },
4775 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4776
4777 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4778 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4779 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4780 { gen_helper_sve_ld1dd_be_r_mte,
4781 gen_helper_sve_ld2dd_be_r_mte,
4782 gen_helper_sve_ld3dd_be_r_mte,
4783 gen_helper_sve_ld4dd_be_r_mte } } },
4784};
4785
c4e7c493
RH
4786static void do_ld_zpa(DisasContext *s, int zt, int pg,
4787 TCGv_i64 addr, int dtype, int nreg)
4788{
206adacf 4789 gen_helper_gvec_mem *fn
c182c6db 4790 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4791
206adacf
RH
4792 /*
4793 * While there are holes in the table, they are not
c4e7c493
RH
4794 * accessible via the instruction encoding.
4795 */
4796 assert(fn != NULL);
206adacf 4797 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4798}
4799
3a7be554 4800static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4801{
4802 if (a->rm == 31) {
4803 return false;
4804 }
4805 if (sve_access_check(s)) {
4806 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4807 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4808 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4809 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4810 }
4811 return true;
4812}
4813
3a7be554 4814static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4815{
4816 if (sve_access_check(s)) {
4817 int vsz = vec_full_reg_size(s);
4818 int elements = vsz >> dtype_esz[a->dtype];
4819 TCGv_i64 addr = new_tmp_a64(s);
4820
4821 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4822 (a->imm * elements * (a->nreg + 1))
4823 << dtype_msz(a->dtype));
4824 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4825 }
4826 return true;
4827}
e2654d75 4828
3a7be554 4829static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4830{
aa13f7c3
RH
4831 static gen_helper_gvec_mem * const fns[2][2][16] = {
4832 { /* mte inactive, little-endian */
4833 { gen_helper_sve_ldff1bb_r,
4834 gen_helper_sve_ldff1bhu_r,
4835 gen_helper_sve_ldff1bsu_r,
4836 gen_helper_sve_ldff1bdu_r,
4837
4838 gen_helper_sve_ldff1sds_le_r,
4839 gen_helper_sve_ldff1hh_le_r,
4840 gen_helper_sve_ldff1hsu_le_r,
4841 gen_helper_sve_ldff1hdu_le_r,
4842
4843 gen_helper_sve_ldff1hds_le_r,
4844 gen_helper_sve_ldff1hss_le_r,
4845 gen_helper_sve_ldff1ss_le_r,
4846 gen_helper_sve_ldff1sdu_le_r,
4847
4848 gen_helper_sve_ldff1bds_r,
4849 gen_helper_sve_ldff1bss_r,
4850 gen_helper_sve_ldff1bhs_r,
4851 gen_helper_sve_ldff1dd_le_r },
4852
4853 /* mte inactive, big-endian */
4854 { gen_helper_sve_ldff1bb_r,
4855 gen_helper_sve_ldff1bhu_r,
4856 gen_helper_sve_ldff1bsu_r,
4857 gen_helper_sve_ldff1bdu_r,
4858
4859 gen_helper_sve_ldff1sds_be_r,
4860 gen_helper_sve_ldff1hh_be_r,
4861 gen_helper_sve_ldff1hsu_be_r,
4862 gen_helper_sve_ldff1hdu_be_r,
4863
4864 gen_helper_sve_ldff1hds_be_r,
4865 gen_helper_sve_ldff1hss_be_r,
4866 gen_helper_sve_ldff1ss_be_r,
4867 gen_helper_sve_ldff1sdu_be_r,
4868
4869 gen_helper_sve_ldff1bds_r,
4870 gen_helper_sve_ldff1bss_r,
4871 gen_helper_sve_ldff1bhs_r,
4872 gen_helper_sve_ldff1dd_be_r } },
4873
4874 { /* mte active, little-endian */
4875 { gen_helper_sve_ldff1bb_r_mte,
4876 gen_helper_sve_ldff1bhu_r_mte,
4877 gen_helper_sve_ldff1bsu_r_mte,
4878 gen_helper_sve_ldff1bdu_r_mte,
4879
4880 gen_helper_sve_ldff1sds_le_r_mte,
4881 gen_helper_sve_ldff1hh_le_r_mte,
4882 gen_helper_sve_ldff1hsu_le_r_mte,
4883 gen_helper_sve_ldff1hdu_le_r_mte,
4884
4885 gen_helper_sve_ldff1hds_le_r_mte,
4886 gen_helper_sve_ldff1hss_le_r_mte,
4887 gen_helper_sve_ldff1ss_le_r_mte,
4888 gen_helper_sve_ldff1sdu_le_r_mte,
4889
4890 gen_helper_sve_ldff1bds_r_mte,
4891 gen_helper_sve_ldff1bss_r_mte,
4892 gen_helper_sve_ldff1bhs_r_mte,
4893 gen_helper_sve_ldff1dd_le_r_mte },
4894
4895 /* mte active, big-endian */
4896 { gen_helper_sve_ldff1bb_r_mte,
4897 gen_helper_sve_ldff1bhu_r_mte,
4898 gen_helper_sve_ldff1bsu_r_mte,
4899 gen_helper_sve_ldff1bdu_r_mte,
4900
4901 gen_helper_sve_ldff1sds_be_r_mte,
4902 gen_helper_sve_ldff1hh_be_r_mte,
4903 gen_helper_sve_ldff1hsu_be_r_mte,
4904 gen_helper_sve_ldff1hdu_be_r_mte,
4905
4906 gen_helper_sve_ldff1hds_be_r_mte,
4907 gen_helper_sve_ldff1hss_be_r_mte,
4908 gen_helper_sve_ldff1ss_be_r_mte,
4909 gen_helper_sve_ldff1sdu_be_r_mte,
4910
4911 gen_helper_sve_ldff1bds_r_mte,
4912 gen_helper_sve_ldff1bss_r_mte,
4913 gen_helper_sve_ldff1bhs_r_mte,
4914 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4915 };
4916
4917 if (sve_access_check(s)) {
4918 TCGv_i64 addr = new_tmp_a64(s);
4919 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4920 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4921 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4922 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4923 }
4924 return true;
4925}
4926
3a7be554 4927static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4928{
aa13f7c3
RH
4929 static gen_helper_gvec_mem * const fns[2][2][16] = {
4930 { /* mte inactive, little-endian */
4931 { gen_helper_sve_ldnf1bb_r,
4932 gen_helper_sve_ldnf1bhu_r,
4933 gen_helper_sve_ldnf1bsu_r,
4934 gen_helper_sve_ldnf1bdu_r,
4935
4936 gen_helper_sve_ldnf1sds_le_r,
4937 gen_helper_sve_ldnf1hh_le_r,
4938 gen_helper_sve_ldnf1hsu_le_r,
4939 gen_helper_sve_ldnf1hdu_le_r,
4940
4941 gen_helper_sve_ldnf1hds_le_r,
4942 gen_helper_sve_ldnf1hss_le_r,
4943 gen_helper_sve_ldnf1ss_le_r,
4944 gen_helper_sve_ldnf1sdu_le_r,
4945
4946 gen_helper_sve_ldnf1bds_r,
4947 gen_helper_sve_ldnf1bss_r,
4948 gen_helper_sve_ldnf1bhs_r,
4949 gen_helper_sve_ldnf1dd_le_r },
4950
4951 /* mte inactive, big-endian */
4952 { gen_helper_sve_ldnf1bb_r,
4953 gen_helper_sve_ldnf1bhu_r,
4954 gen_helper_sve_ldnf1bsu_r,
4955 gen_helper_sve_ldnf1bdu_r,
4956
4957 gen_helper_sve_ldnf1sds_be_r,
4958 gen_helper_sve_ldnf1hh_be_r,
4959 gen_helper_sve_ldnf1hsu_be_r,
4960 gen_helper_sve_ldnf1hdu_be_r,
4961
4962 gen_helper_sve_ldnf1hds_be_r,
4963 gen_helper_sve_ldnf1hss_be_r,
4964 gen_helper_sve_ldnf1ss_be_r,
4965 gen_helper_sve_ldnf1sdu_be_r,
4966
4967 gen_helper_sve_ldnf1bds_r,
4968 gen_helper_sve_ldnf1bss_r,
4969 gen_helper_sve_ldnf1bhs_r,
4970 gen_helper_sve_ldnf1dd_be_r } },
4971
4972 { /* mte inactive, little-endian */
4973 { gen_helper_sve_ldnf1bb_r_mte,
4974 gen_helper_sve_ldnf1bhu_r_mte,
4975 gen_helper_sve_ldnf1bsu_r_mte,
4976 gen_helper_sve_ldnf1bdu_r_mte,
4977
4978 gen_helper_sve_ldnf1sds_le_r_mte,
4979 gen_helper_sve_ldnf1hh_le_r_mte,
4980 gen_helper_sve_ldnf1hsu_le_r_mte,
4981 gen_helper_sve_ldnf1hdu_le_r_mte,
4982
4983 gen_helper_sve_ldnf1hds_le_r_mte,
4984 gen_helper_sve_ldnf1hss_le_r_mte,
4985 gen_helper_sve_ldnf1ss_le_r_mte,
4986 gen_helper_sve_ldnf1sdu_le_r_mte,
4987
4988 gen_helper_sve_ldnf1bds_r_mte,
4989 gen_helper_sve_ldnf1bss_r_mte,
4990 gen_helper_sve_ldnf1bhs_r_mte,
4991 gen_helper_sve_ldnf1dd_le_r_mte },
4992
4993 /* mte inactive, big-endian */
4994 { gen_helper_sve_ldnf1bb_r_mte,
4995 gen_helper_sve_ldnf1bhu_r_mte,
4996 gen_helper_sve_ldnf1bsu_r_mte,
4997 gen_helper_sve_ldnf1bdu_r_mte,
4998
4999 gen_helper_sve_ldnf1sds_be_r_mte,
5000 gen_helper_sve_ldnf1hh_be_r_mte,
5001 gen_helper_sve_ldnf1hsu_be_r_mte,
5002 gen_helper_sve_ldnf1hdu_be_r_mte,
5003
5004 gen_helper_sve_ldnf1hds_be_r_mte,
5005 gen_helper_sve_ldnf1hss_be_r_mte,
5006 gen_helper_sve_ldnf1ss_be_r_mte,
5007 gen_helper_sve_ldnf1sdu_be_r_mte,
5008
5009 gen_helper_sve_ldnf1bds_r_mte,
5010 gen_helper_sve_ldnf1bss_r_mte,
5011 gen_helper_sve_ldnf1bhs_r_mte,
5012 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5013 };
5014
5015 if (sve_access_check(s)) {
5016 int vsz = vec_full_reg_size(s);
5017 int elements = vsz >> dtype_esz[a->dtype];
5018 int off = (a->imm * elements) << dtype_msz(a->dtype);
5019 TCGv_i64 addr = new_tmp_a64(s);
5020
5021 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5022 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5023 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5024 }
5025 return true;
5026}
1a039c7e 5027
c182c6db 5028static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5029{
05abe304
RH
5030 unsigned vsz = vec_full_reg_size(s);
5031 TCGv_ptr t_pg;
7924d239 5032 int poff;
05abe304
RH
5033
5034 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5035 poff = pred_full_reg_offset(s, pg);
5036 if (vsz > 16) {
5037 /*
5038 * Zero-extend the first 16 bits of the predicate into a temporary.
5039 * This avoids triggering an assert making sure we don't have bits
5040 * set within a predicate beyond VQ, but we have lowered VQ to 1
5041 * for this load operation.
5042 */
5043 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5044#if HOST_BIG_ENDIAN
2a99ab2b
RH
5045 poff += 6;
5046#endif
5047 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5048
5049 poff = offsetof(CPUARMState, vfp.preg_tmp);
5050 tcg_gen_st_i64(tmp, cpu_env, poff);
5051 tcg_temp_free_i64(tmp);
5052 }
5053
05abe304 5054 t_pg = tcg_temp_new_ptr();
2a99ab2b 5055 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5056
c182c6db
RH
5057 gen_helper_gvec_mem *fn
5058 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5059 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5060
5061 tcg_temp_free_ptr(t_pg);
05abe304
RH
5062
5063 /* Replicate that first quadword. */
5064 if (vsz > 16) {
7924d239
RH
5065 int doff = vec_full_reg_offset(s, zt);
5066 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5067 }
5068}
5069
3a7be554 5070static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5071{
5072 if (a->rm == 31) {
5073 return false;
5074 }
5075 if (sve_access_check(s)) {
5076 int msz = dtype_msz(a->dtype);
5077 TCGv_i64 addr = new_tmp_a64(s);
5078 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5079 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5080 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5081 }
5082 return true;
5083}
5084
3a7be554 5085static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5086{
5087 if (sve_access_check(s)) {
5088 TCGv_i64 addr = new_tmp_a64(s);
5089 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5090 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5091 }
5092 return true;
5093}
5094
12c563f6
RH
5095static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5096{
5097 unsigned vsz = vec_full_reg_size(s);
5098 unsigned vsz_r32;
5099 TCGv_ptr t_pg;
5100 int poff, doff;
5101
5102 if (vsz < 32) {
5103 /*
5104 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5105 * in the ARM pseudocode, which is the sve_access_check() done
5106 * in our caller. We should not now return false from the caller.
5107 */
5108 unallocated_encoding(s);
5109 return;
5110 }
5111
5112 /* Load the first octaword using the normal predicated load helpers. */
5113
5114 poff = pred_full_reg_offset(s, pg);
5115 if (vsz > 32) {
5116 /*
5117 * Zero-extend the first 32 bits of the predicate into a temporary.
5118 * This avoids triggering an assert making sure we don't have bits
5119 * set within a predicate beyond VQ, but we have lowered VQ to 2
5120 * for this load operation.
5121 */
5122 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5123#if HOST_BIG_ENDIAN
12c563f6
RH
5124 poff += 4;
5125#endif
5126 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5127
5128 poff = offsetof(CPUARMState, vfp.preg_tmp);
5129 tcg_gen_st_i64(tmp, cpu_env, poff);
5130 tcg_temp_free_i64(tmp);
5131 }
5132
5133 t_pg = tcg_temp_new_ptr();
5134 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5135
5136 gen_helper_gvec_mem *fn
5137 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5138 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5139
5140 tcg_temp_free_ptr(t_pg);
5141
5142 /*
5143 * Replicate that first octaword.
5144 * The replication happens in units of 32; if the full vector size
5145 * is not a multiple of 32, the final bits are zeroed.
5146 */
5147 doff = vec_full_reg_offset(s, zt);
5148 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5149 if (vsz >= 64) {
5150 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5151 }
5152 vsz -= vsz_r32;
5153 if (vsz) {
5154 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5155 }
5156}
5157
5158static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5159{
5160 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5161 return false;
5162 }
5163 if (a->rm == 31) {
5164 return false;
5165 }
5166 if (sve_access_check(s)) {
5167 TCGv_i64 addr = new_tmp_a64(s);
5168 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5169 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5170 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5171 }
5172 return true;
5173}
5174
5175static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5176{
5177 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5178 return false;
5179 }
5180 if (sve_access_check(s)) {
5181 TCGv_i64 addr = new_tmp_a64(s);
5182 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5183 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5184 }
5185 return true;
5186}
5187
68459864 5188/* Load and broadcast element. */
3a7be554 5189static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5190{
68459864
RH
5191 unsigned vsz = vec_full_reg_size(s);
5192 unsigned psz = pred_full_reg_size(s);
5193 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5194 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5195 TCGLabel *over;
4ac430e1 5196 TCGv_i64 temp, clean_addr;
68459864 5197
c0ed9166
RH
5198 if (!sve_access_check(s)) {
5199 return true;
5200 }
5201
5202 over = gen_new_label();
5203
68459864
RH
5204 /* If the guarding predicate has no bits set, no load occurs. */
5205 if (psz <= 8) {
5206 /* Reduce the pred_esz_masks value simply to reduce the
5207 * size of the code generated here.
5208 */
5209 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5210 temp = tcg_temp_new_i64();
5211 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5212 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5213 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5214 tcg_temp_free_i64(temp);
5215 } else {
5216 TCGv_i32 t32 = tcg_temp_new_i32();
5217 find_last_active(s, t32, esz, a->pg);
5218 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5219 tcg_temp_free_i32(t32);
5220 }
5221
5222 /* Load the data. */
5223 temp = tcg_temp_new_i64();
d0e372b0 5224 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5225 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5226
5227 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5228 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5229
5230 /* Broadcast to *all* elements. */
5231 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5232 vsz, vsz, temp);
5233 tcg_temp_free_i64(temp);
5234
5235 /* Zero the inactive elements. */
5236 gen_set_label(over);
60245996 5237 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5238}
5239
1a039c7e
RH
5240static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5241 int msz, int esz, int nreg)
5242{
71b9f394
RH
5243 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5244 { { { gen_helper_sve_st1bb_r,
5245 gen_helper_sve_st1bh_r,
5246 gen_helper_sve_st1bs_r,
5247 gen_helper_sve_st1bd_r },
5248 { NULL,
5249 gen_helper_sve_st1hh_le_r,
5250 gen_helper_sve_st1hs_le_r,
5251 gen_helper_sve_st1hd_le_r },
5252 { NULL, NULL,
5253 gen_helper_sve_st1ss_le_r,
5254 gen_helper_sve_st1sd_le_r },
5255 { NULL, NULL, NULL,
5256 gen_helper_sve_st1dd_le_r } },
5257 { { gen_helper_sve_st1bb_r,
5258 gen_helper_sve_st1bh_r,
5259 gen_helper_sve_st1bs_r,
5260 gen_helper_sve_st1bd_r },
5261 { NULL,
5262 gen_helper_sve_st1hh_be_r,
5263 gen_helper_sve_st1hs_be_r,
5264 gen_helper_sve_st1hd_be_r },
5265 { NULL, NULL,
5266 gen_helper_sve_st1ss_be_r,
5267 gen_helper_sve_st1sd_be_r },
5268 { NULL, NULL, NULL,
5269 gen_helper_sve_st1dd_be_r } } },
5270
5271 { { { gen_helper_sve_st1bb_r_mte,
5272 gen_helper_sve_st1bh_r_mte,
5273 gen_helper_sve_st1bs_r_mte,
5274 gen_helper_sve_st1bd_r_mte },
5275 { NULL,
5276 gen_helper_sve_st1hh_le_r_mte,
5277 gen_helper_sve_st1hs_le_r_mte,
5278 gen_helper_sve_st1hd_le_r_mte },
5279 { NULL, NULL,
5280 gen_helper_sve_st1ss_le_r_mte,
5281 gen_helper_sve_st1sd_le_r_mte },
5282 { NULL, NULL, NULL,
5283 gen_helper_sve_st1dd_le_r_mte } },
5284 { { gen_helper_sve_st1bb_r_mte,
5285 gen_helper_sve_st1bh_r_mte,
5286 gen_helper_sve_st1bs_r_mte,
5287 gen_helper_sve_st1bd_r_mte },
5288 { NULL,
5289 gen_helper_sve_st1hh_be_r_mte,
5290 gen_helper_sve_st1hs_be_r_mte,
5291 gen_helper_sve_st1hd_be_r_mte },
5292 { NULL, NULL,
5293 gen_helper_sve_st1ss_be_r_mte,
5294 gen_helper_sve_st1sd_be_r_mte },
5295 { NULL, NULL, NULL,
5296 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5297 };
71b9f394
RH
5298 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5299 { { { gen_helper_sve_st2bb_r,
5300 gen_helper_sve_st2hh_le_r,
5301 gen_helper_sve_st2ss_le_r,
5302 gen_helper_sve_st2dd_le_r },
5303 { gen_helper_sve_st3bb_r,
5304 gen_helper_sve_st3hh_le_r,
5305 gen_helper_sve_st3ss_le_r,
5306 gen_helper_sve_st3dd_le_r },
5307 { gen_helper_sve_st4bb_r,
5308 gen_helper_sve_st4hh_le_r,
5309 gen_helper_sve_st4ss_le_r,
5310 gen_helper_sve_st4dd_le_r } },
5311 { { gen_helper_sve_st2bb_r,
5312 gen_helper_sve_st2hh_be_r,
5313 gen_helper_sve_st2ss_be_r,
5314 gen_helper_sve_st2dd_be_r },
5315 { gen_helper_sve_st3bb_r,
5316 gen_helper_sve_st3hh_be_r,
5317 gen_helper_sve_st3ss_be_r,
5318 gen_helper_sve_st3dd_be_r },
5319 { gen_helper_sve_st4bb_r,
5320 gen_helper_sve_st4hh_be_r,
5321 gen_helper_sve_st4ss_be_r,
5322 gen_helper_sve_st4dd_be_r } } },
5323 { { { gen_helper_sve_st2bb_r_mte,
5324 gen_helper_sve_st2hh_le_r_mte,
5325 gen_helper_sve_st2ss_le_r_mte,
5326 gen_helper_sve_st2dd_le_r_mte },
5327 { gen_helper_sve_st3bb_r_mte,
5328 gen_helper_sve_st3hh_le_r_mte,
5329 gen_helper_sve_st3ss_le_r_mte,
5330 gen_helper_sve_st3dd_le_r_mte },
5331 { gen_helper_sve_st4bb_r_mte,
5332 gen_helper_sve_st4hh_le_r_mte,
5333 gen_helper_sve_st4ss_le_r_mte,
5334 gen_helper_sve_st4dd_le_r_mte } },
5335 { { gen_helper_sve_st2bb_r_mte,
5336 gen_helper_sve_st2hh_be_r_mte,
5337 gen_helper_sve_st2ss_be_r_mte,
5338 gen_helper_sve_st2dd_be_r_mte },
5339 { gen_helper_sve_st3bb_r_mte,
5340 gen_helper_sve_st3hh_be_r_mte,
5341 gen_helper_sve_st3ss_be_r_mte,
5342 gen_helper_sve_st3dd_be_r_mte },
5343 { gen_helper_sve_st4bb_r_mte,
5344 gen_helper_sve_st4hh_be_r_mte,
5345 gen_helper_sve_st4ss_be_r_mte,
5346 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5347 };
5348 gen_helper_gvec_mem *fn;
28d57f2d 5349 int be = s->be_data == MO_BE;
1a039c7e
RH
5350
5351 if (nreg == 0) {
5352 /* ST1 */
71b9f394
RH
5353 fn = fn_single[s->mte_active[0]][be][msz][esz];
5354 nreg = 1;
1a039c7e
RH
5355 } else {
5356 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5357 assert(msz == esz);
71b9f394 5358 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5359 }
5360 assert(fn != NULL);
71b9f394 5361 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5362}
5363
3a7be554 5364static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5365{
5366 if (a->rm == 31 || a->msz > a->esz) {
5367 return false;
5368 }
5369 if (sve_access_check(s)) {
5370 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5371 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5372 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5373 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5374 }
5375 return true;
5376}
5377
3a7be554 5378static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5379{
5380 if (a->msz > a->esz) {
5381 return false;
5382 }
5383 if (sve_access_check(s)) {
5384 int vsz = vec_full_reg_size(s);
5385 int elements = vsz >> a->esz;
5386 TCGv_i64 addr = new_tmp_a64(s);
5387
5388 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5389 (a->imm * elements * (a->nreg + 1)) << a->msz);
5390 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5391 }
5392 return true;
5393}
f6dbf62a
RH
5394
5395/*
5396 *** SVE gather loads / scatter stores
5397 */
5398
500d0484 5399static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5400 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5401 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5402{
5403 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5404 TCGv_ptr t_zm = tcg_temp_new_ptr();
5405 TCGv_ptr t_pg = tcg_temp_new_ptr();
5406 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5407 int desc = 0;
500d0484 5408
d28d12f0
RH
5409 if (s->mte_active[0]) {
5410 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5411 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5412 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5413 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5414 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5415 desc <<= SVE_MTEDESC_SHIFT;
5416 }
cdecb3fc 5417 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5418
5419 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5420 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5421 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5422 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5423
5424 tcg_temp_free_ptr(t_zt);
5425 tcg_temp_free_ptr(t_zm);
5426 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5427}
5428
d28d12f0
RH
5429/* Indexed by [mte][be][ff][xs][u][msz]. */
5430static gen_helper_gvec_mem_scatter * const
5431gather_load_fn32[2][2][2][2][2][3] = {
5432 { /* MTE Inactive */
5433 { /* Little-endian */
5434 { { { gen_helper_sve_ldbss_zsu,
5435 gen_helper_sve_ldhss_le_zsu,
5436 NULL, },
5437 { gen_helper_sve_ldbsu_zsu,
5438 gen_helper_sve_ldhsu_le_zsu,
5439 gen_helper_sve_ldss_le_zsu, } },
5440 { { gen_helper_sve_ldbss_zss,
5441 gen_helper_sve_ldhss_le_zss,
5442 NULL, },
5443 { gen_helper_sve_ldbsu_zss,
5444 gen_helper_sve_ldhsu_le_zss,
5445 gen_helper_sve_ldss_le_zss, } } },
5446
5447 /* First-fault */
5448 { { { gen_helper_sve_ldffbss_zsu,
5449 gen_helper_sve_ldffhss_le_zsu,
5450 NULL, },
5451 { gen_helper_sve_ldffbsu_zsu,
5452 gen_helper_sve_ldffhsu_le_zsu,
5453 gen_helper_sve_ldffss_le_zsu, } },
5454 { { gen_helper_sve_ldffbss_zss,
5455 gen_helper_sve_ldffhss_le_zss,
5456 NULL, },
5457 { gen_helper_sve_ldffbsu_zss,
5458 gen_helper_sve_ldffhsu_le_zss,
5459 gen_helper_sve_ldffss_le_zss, } } } },
5460
5461 { /* Big-endian */
5462 { { { gen_helper_sve_ldbss_zsu,
5463 gen_helper_sve_ldhss_be_zsu,
5464 NULL, },
5465 { gen_helper_sve_ldbsu_zsu,
5466 gen_helper_sve_ldhsu_be_zsu,
5467 gen_helper_sve_ldss_be_zsu, } },
5468 { { gen_helper_sve_ldbss_zss,
5469 gen_helper_sve_ldhss_be_zss,
5470 NULL, },
5471 { gen_helper_sve_ldbsu_zss,
5472 gen_helper_sve_ldhsu_be_zss,
5473 gen_helper_sve_ldss_be_zss, } } },
5474
5475 /* First-fault */
5476 { { { gen_helper_sve_ldffbss_zsu,
5477 gen_helper_sve_ldffhss_be_zsu,
5478 NULL, },
5479 { gen_helper_sve_ldffbsu_zsu,
5480 gen_helper_sve_ldffhsu_be_zsu,
5481 gen_helper_sve_ldffss_be_zsu, } },
5482 { { gen_helper_sve_ldffbss_zss,
5483 gen_helper_sve_ldffhss_be_zss,
5484 NULL, },
5485 { gen_helper_sve_ldffbsu_zss,
5486 gen_helper_sve_ldffhsu_be_zss,
5487 gen_helper_sve_ldffss_be_zss, } } } } },
5488 { /* MTE Active */
5489 { /* Little-endian */
5490 { { { gen_helper_sve_ldbss_zsu_mte,
5491 gen_helper_sve_ldhss_le_zsu_mte,
5492 NULL, },
5493 { gen_helper_sve_ldbsu_zsu_mte,
5494 gen_helper_sve_ldhsu_le_zsu_mte,
5495 gen_helper_sve_ldss_le_zsu_mte, } },
5496 { { gen_helper_sve_ldbss_zss_mte,
5497 gen_helper_sve_ldhss_le_zss_mte,
5498 NULL, },
5499 { gen_helper_sve_ldbsu_zss_mte,
5500 gen_helper_sve_ldhsu_le_zss_mte,
5501 gen_helper_sve_ldss_le_zss_mte, } } },
5502
5503 /* First-fault */
5504 { { { gen_helper_sve_ldffbss_zsu_mte,
5505 gen_helper_sve_ldffhss_le_zsu_mte,
5506 NULL, },
5507 { gen_helper_sve_ldffbsu_zsu_mte,
5508 gen_helper_sve_ldffhsu_le_zsu_mte,
5509 gen_helper_sve_ldffss_le_zsu_mte, } },
5510 { { gen_helper_sve_ldffbss_zss_mte,
5511 gen_helper_sve_ldffhss_le_zss_mte,
5512 NULL, },
5513 { gen_helper_sve_ldffbsu_zss_mte,
5514 gen_helper_sve_ldffhsu_le_zss_mte,
5515 gen_helper_sve_ldffss_le_zss_mte, } } } },
5516
5517 { /* Big-endian */
5518 { { { gen_helper_sve_ldbss_zsu_mte,
5519 gen_helper_sve_ldhss_be_zsu_mte,
5520 NULL, },
5521 { gen_helper_sve_ldbsu_zsu_mte,
5522 gen_helper_sve_ldhsu_be_zsu_mte,
5523 gen_helper_sve_ldss_be_zsu_mte, } },
5524 { { gen_helper_sve_ldbss_zss_mte,
5525 gen_helper_sve_ldhss_be_zss_mte,
5526 NULL, },
5527 { gen_helper_sve_ldbsu_zss_mte,
5528 gen_helper_sve_ldhsu_be_zss_mte,
5529 gen_helper_sve_ldss_be_zss_mte, } } },
5530
5531 /* First-fault */
5532 { { { gen_helper_sve_ldffbss_zsu_mte,
5533 gen_helper_sve_ldffhss_be_zsu_mte,
5534 NULL, },
5535 { gen_helper_sve_ldffbsu_zsu_mte,
5536 gen_helper_sve_ldffhsu_be_zsu_mte,
5537 gen_helper_sve_ldffss_be_zsu_mte, } },
5538 { { gen_helper_sve_ldffbss_zss_mte,
5539 gen_helper_sve_ldffhss_be_zss_mte,
5540 NULL, },
5541 { gen_helper_sve_ldffbsu_zss_mte,
5542 gen_helper_sve_ldffhsu_be_zss_mte,
5543 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5544};
5545
5546/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5547static gen_helper_gvec_mem_scatter * const
5548gather_load_fn64[2][2][2][3][2][4] = {
5549 { /* MTE Inactive */
5550 { /* Little-endian */
5551 { { { gen_helper_sve_ldbds_zsu,
5552 gen_helper_sve_ldhds_le_zsu,
5553 gen_helper_sve_ldsds_le_zsu,
5554 NULL, },
5555 { gen_helper_sve_ldbdu_zsu,
5556 gen_helper_sve_ldhdu_le_zsu,
5557 gen_helper_sve_ldsdu_le_zsu,
5558 gen_helper_sve_lddd_le_zsu, } },
5559 { { gen_helper_sve_ldbds_zss,
5560 gen_helper_sve_ldhds_le_zss,
5561 gen_helper_sve_ldsds_le_zss,
5562 NULL, },
5563 { gen_helper_sve_ldbdu_zss,
5564 gen_helper_sve_ldhdu_le_zss,
5565 gen_helper_sve_ldsdu_le_zss,
5566 gen_helper_sve_lddd_le_zss, } },
5567 { { gen_helper_sve_ldbds_zd,
5568 gen_helper_sve_ldhds_le_zd,
5569 gen_helper_sve_ldsds_le_zd,
5570 NULL, },
5571 { gen_helper_sve_ldbdu_zd,
5572 gen_helper_sve_ldhdu_le_zd,
5573 gen_helper_sve_ldsdu_le_zd,
5574 gen_helper_sve_lddd_le_zd, } } },
5575
5576 /* First-fault */
5577 { { { gen_helper_sve_ldffbds_zsu,
5578 gen_helper_sve_ldffhds_le_zsu,
5579 gen_helper_sve_ldffsds_le_zsu,
5580 NULL, },
5581 { gen_helper_sve_ldffbdu_zsu,
5582 gen_helper_sve_ldffhdu_le_zsu,
5583 gen_helper_sve_ldffsdu_le_zsu,
5584 gen_helper_sve_ldffdd_le_zsu, } },
5585 { { gen_helper_sve_ldffbds_zss,
5586 gen_helper_sve_ldffhds_le_zss,
5587 gen_helper_sve_ldffsds_le_zss,
5588 NULL, },
5589 { gen_helper_sve_ldffbdu_zss,
5590 gen_helper_sve_ldffhdu_le_zss,
5591 gen_helper_sve_ldffsdu_le_zss,
5592 gen_helper_sve_ldffdd_le_zss, } },
5593 { { gen_helper_sve_ldffbds_zd,
5594 gen_helper_sve_ldffhds_le_zd,
5595 gen_helper_sve_ldffsds_le_zd,
5596 NULL, },
5597 { gen_helper_sve_ldffbdu_zd,
5598 gen_helper_sve_ldffhdu_le_zd,
5599 gen_helper_sve_ldffsdu_le_zd,
5600 gen_helper_sve_ldffdd_le_zd, } } } },
5601 { /* Big-endian */
5602 { { { gen_helper_sve_ldbds_zsu,
5603 gen_helper_sve_ldhds_be_zsu,
5604 gen_helper_sve_ldsds_be_zsu,
5605 NULL, },
5606 { gen_helper_sve_ldbdu_zsu,
5607 gen_helper_sve_ldhdu_be_zsu,
5608 gen_helper_sve_ldsdu_be_zsu,
5609 gen_helper_sve_lddd_be_zsu, } },
5610 { { gen_helper_sve_ldbds_zss,
5611 gen_helper_sve_ldhds_be_zss,
5612 gen_helper_sve_ldsds_be_zss,
5613 NULL, },
5614 { gen_helper_sve_ldbdu_zss,
5615 gen_helper_sve_ldhdu_be_zss,
5616 gen_helper_sve_ldsdu_be_zss,
5617 gen_helper_sve_lddd_be_zss, } },
5618 { { gen_helper_sve_ldbds_zd,
5619 gen_helper_sve_ldhds_be_zd,
5620 gen_helper_sve_ldsds_be_zd,
5621 NULL, },
5622 { gen_helper_sve_ldbdu_zd,
5623 gen_helper_sve_ldhdu_be_zd,
5624 gen_helper_sve_ldsdu_be_zd,
5625 gen_helper_sve_lddd_be_zd, } } },
5626
5627 /* First-fault */
5628 { { { gen_helper_sve_ldffbds_zsu,
5629 gen_helper_sve_ldffhds_be_zsu,
5630 gen_helper_sve_ldffsds_be_zsu,
5631 NULL, },
5632 { gen_helper_sve_ldffbdu_zsu,
5633 gen_helper_sve_ldffhdu_be_zsu,
5634 gen_helper_sve_ldffsdu_be_zsu,
5635 gen_helper_sve_ldffdd_be_zsu, } },
5636 { { gen_helper_sve_ldffbds_zss,
5637 gen_helper_sve_ldffhds_be_zss,
5638 gen_helper_sve_ldffsds_be_zss,
5639 NULL, },
5640 { gen_helper_sve_ldffbdu_zss,
5641 gen_helper_sve_ldffhdu_be_zss,
5642 gen_helper_sve_ldffsdu_be_zss,
5643 gen_helper_sve_ldffdd_be_zss, } },
5644 { { gen_helper_sve_ldffbds_zd,
5645 gen_helper_sve_ldffhds_be_zd,
5646 gen_helper_sve_ldffsds_be_zd,
5647 NULL, },
5648 { gen_helper_sve_ldffbdu_zd,
5649 gen_helper_sve_ldffhdu_be_zd,
5650 gen_helper_sve_ldffsdu_be_zd,
5651 gen_helper_sve_ldffdd_be_zd, } } } } },
5652 { /* MTE Active */
5653 { /* Little-endian */
5654 { { { gen_helper_sve_ldbds_zsu_mte,
5655 gen_helper_sve_ldhds_le_zsu_mte,
5656 gen_helper_sve_ldsds_le_zsu_mte,
5657 NULL, },
5658 { gen_helper_sve_ldbdu_zsu_mte,
5659 gen_helper_sve_ldhdu_le_zsu_mte,
5660 gen_helper_sve_ldsdu_le_zsu_mte,
5661 gen_helper_sve_lddd_le_zsu_mte, } },
5662 { { gen_helper_sve_ldbds_zss_mte,
5663 gen_helper_sve_ldhds_le_zss_mte,
5664 gen_helper_sve_ldsds_le_zss_mte,
5665 NULL, },
5666 { gen_helper_sve_ldbdu_zss_mte,
5667 gen_helper_sve_ldhdu_le_zss_mte,
5668 gen_helper_sve_ldsdu_le_zss_mte,
5669 gen_helper_sve_lddd_le_zss_mte, } },
5670 { { gen_helper_sve_ldbds_zd_mte,
5671 gen_helper_sve_ldhds_le_zd_mte,
5672 gen_helper_sve_ldsds_le_zd_mte,
5673 NULL, },
5674 { gen_helper_sve_ldbdu_zd_mte,
5675 gen_helper_sve_ldhdu_le_zd_mte,
5676 gen_helper_sve_ldsdu_le_zd_mte,
5677 gen_helper_sve_lddd_le_zd_mte, } } },
5678
5679 /* First-fault */
5680 { { { gen_helper_sve_ldffbds_zsu_mte,
5681 gen_helper_sve_ldffhds_le_zsu_mte,
5682 gen_helper_sve_ldffsds_le_zsu_mte,
5683 NULL, },
5684 { gen_helper_sve_ldffbdu_zsu_mte,
5685 gen_helper_sve_ldffhdu_le_zsu_mte,
5686 gen_helper_sve_ldffsdu_le_zsu_mte,
5687 gen_helper_sve_ldffdd_le_zsu_mte, } },
5688 { { gen_helper_sve_ldffbds_zss_mte,
5689 gen_helper_sve_ldffhds_le_zss_mte,
5690 gen_helper_sve_ldffsds_le_zss_mte,
5691 NULL, },
5692 { gen_helper_sve_ldffbdu_zss_mte,
5693 gen_helper_sve_ldffhdu_le_zss_mte,
5694 gen_helper_sve_ldffsdu_le_zss_mte,
5695 gen_helper_sve_ldffdd_le_zss_mte, } },
5696 { { gen_helper_sve_ldffbds_zd_mte,
5697 gen_helper_sve_ldffhds_le_zd_mte,
5698 gen_helper_sve_ldffsds_le_zd_mte,
5699 NULL, },
5700 { gen_helper_sve_ldffbdu_zd_mte,
5701 gen_helper_sve_ldffhdu_le_zd_mte,
5702 gen_helper_sve_ldffsdu_le_zd_mte,
5703 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5704 { /* Big-endian */
5705 { { { gen_helper_sve_ldbds_zsu_mte,
5706 gen_helper_sve_ldhds_be_zsu_mte,
5707 gen_helper_sve_ldsds_be_zsu_mte,
5708 NULL, },
5709 { gen_helper_sve_ldbdu_zsu_mte,
5710 gen_helper_sve_ldhdu_be_zsu_mte,
5711 gen_helper_sve_ldsdu_be_zsu_mte,
5712 gen_helper_sve_lddd_be_zsu_mte, } },
5713 { { gen_helper_sve_ldbds_zss_mte,
5714 gen_helper_sve_ldhds_be_zss_mte,
5715 gen_helper_sve_ldsds_be_zss_mte,
5716 NULL, },
5717 { gen_helper_sve_ldbdu_zss_mte,
5718 gen_helper_sve_ldhdu_be_zss_mte,
5719 gen_helper_sve_ldsdu_be_zss_mte,
5720 gen_helper_sve_lddd_be_zss_mte, } },
5721 { { gen_helper_sve_ldbds_zd_mte,
5722 gen_helper_sve_ldhds_be_zd_mte,
5723 gen_helper_sve_ldsds_be_zd_mte,
5724 NULL, },
5725 { gen_helper_sve_ldbdu_zd_mte,
5726 gen_helper_sve_ldhdu_be_zd_mte,
5727 gen_helper_sve_ldsdu_be_zd_mte,
5728 gen_helper_sve_lddd_be_zd_mte, } } },
5729
5730 /* First-fault */
5731 { { { gen_helper_sve_ldffbds_zsu_mte,
5732 gen_helper_sve_ldffhds_be_zsu_mte,
5733 gen_helper_sve_ldffsds_be_zsu_mte,
5734 NULL, },
5735 { gen_helper_sve_ldffbdu_zsu_mte,
5736 gen_helper_sve_ldffhdu_be_zsu_mte,
5737 gen_helper_sve_ldffsdu_be_zsu_mte,
5738 gen_helper_sve_ldffdd_be_zsu_mte, } },
5739 { { gen_helper_sve_ldffbds_zss_mte,
5740 gen_helper_sve_ldffhds_be_zss_mte,
5741 gen_helper_sve_ldffsds_be_zss_mte,
5742 NULL, },
5743 { gen_helper_sve_ldffbdu_zss_mte,
5744 gen_helper_sve_ldffhdu_be_zss_mte,
5745 gen_helper_sve_ldffsdu_be_zss_mte,
5746 gen_helper_sve_ldffdd_be_zss_mte, } },
5747 { { gen_helper_sve_ldffbds_zd_mte,
5748 gen_helper_sve_ldffhds_be_zd_mte,
5749 gen_helper_sve_ldffsds_be_zd_mte,
5750 NULL, },
5751 { gen_helper_sve_ldffbdu_zd_mte,
5752 gen_helper_sve_ldffhdu_be_zd_mte,
5753 gen_helper_sve_ldffsdu_be_zd_mte,
5754 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5755};
5756
3a7be554 5757static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5758{
5759 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5760 bool be = s->be_data == MO_BE;
5761 bool mte = s->mte_active[0];
673e9fa6
RH
5762
5763 if (!sve_access_check(s)) {
5764 return true;
5765 }
5766
5767 switch (a->esz) {
5768 case MO_32:
d28d12f0 5769 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5770 break;
5771 case MO_64:
d28d12f0 5772 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5773 break;
5774 }
5775 assert(fn != NULL);
5776
5777 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5778 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5779 return true;
5780}
5781
3a7be554 5782static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5783{
5784 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5785 bool be = s->be_data == MO_BE;
5786 bool mte = s->mte_active[0];
673e9fa6
RH
5787
5788 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5789 return false;
5790 }
5791 if (!sve_access_check(s)) {
5792 return true;
5793 }
5794
5795 switch (a->esz) {
5796 case MO_32:
d28d12f0 5797 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5798 break;
5799 case MO_64:
d28d12f0 5800 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5801 break;
5802 }
5803 assert(fn != NULL);
5804
5805 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5806 * by loading the immediate into the scalar parameter.
5807 */
2ccdf94f
RH
5808 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5809 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5810 return true;
5811}
5812
cf327449
SL
5813static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5814{
b17ab470
RH
5815 gen_helper_gvec_mem_scatter *fn = NULL;
5816 bool be = s->be_data == MO_BE;
5817 bool mte = s->mte_active[0];
5818
5819 if (a->esz < a->msz + !a->u) {
5820 return false;
5821 }
cf327449
SL
5822 if (!dc_isar_feature(aa64_sve2, s)) {
5823 return false;
5824 }
b17ab470
RH
5825 if (!sve_access_check(s)) {
5826 return true;
5827 }
5828
5829 switch (a->esz) {
5830 case MO_32:
5831 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5832 break;
5833 case MO_64:
5834 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5835 break;
5836 }
5837 assert(fn != NULL);
5838
5839 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5840 cpu_reg(s, a->rm), a->msz, false, fn);
5841 return true;
cf327449
SL
5842}
5843
d28d12f0
RH
5844/* Indexed by [mte][be][xs][msz]. */
5845static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5846 { /* MTE Inactive */
5847 { /* Little-endian */
5848 { gen_helper_sve_stbs_zsu,
5849 gen_helper_sve_sths_le_zsu,
5850 gen_helper_sve_stss_le_zsu, },
5851 { gen_helper_sve_stbs_zss,
5852 gen_helper_sve_sths_le_zss,
5853 gen_helper_sve_stss_le_zss, } },
5854 { /* Big-endian */
5855 { gen_helper_sve_stbs_zsu,
5856 gen_helper_sve_sths_be_zsu,
5857 gen_helper_sve_stss_be_zsu, },
5858 { gen_helper_sve_stbs_zss,
5859 gen_helper_sve_sths_be_zss,
5860 gen_helper_sve_stss_be_zss, } } },
5861 { /* MTE Active */
5862 { /* Little-endian */
5863 { gen_helper_sve_stbs_zsu_mte,
5864 gen_helper_sve_sths_le_zsu_mte,
5865 gen_helper_sve_stss_le_zsu_mte, },
5866 { gen_helper_sve_stbs_zss_mte,
5867 gen_helper_sve_sths_le_zss_mte,
5868 gen_helper_sve_stss_le_zss_mte, } },
5869 { /* Big-endian */
5870 { gen_helper_sve_stbs_zsu_mte,
5871 gen_helper_sve_sths_be_zsu_mte,
5872 gen_helper_sve_stss_be_zsu_mte, },
5873 { gen_helper_sve_stbs_zss_mte,
5874 gen_helper_sve_sths_be_zss_mte,
5875 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5876};
5877
5878/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5879static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5880 { /* MTE Inactive */
5881 { /* Little-endian */
5882 { gen_helper_sve_stbd_zsu,
5883 gen_helper_sve_sthd_le_zsu,
5884 gen_helper_sve_stsd_le_zsu,
5885 gen_helper_sve_stdd_le_zsu, },
5886 { gen_helper_sve_stbd_zss,
5887 gen_helper_sve_sthd_le_zss,
5888 gen_helper_sve_stsd_le_zss,
5889 gen_helper_sve_stdd_le_zss, },
5890 { gen_helper_sve_stbd_zd,
5891 gen_helper_sve_sthd_le_zd,
5892 gen_helper_sve_stsd_le_zd,
5893 gen_helper_sve_stdd_le_zd, } },
5894 { /* Big-endian */
5895 { gen_helper_sve_stbd_zsu,
5896 gen_helper_sve_sthd_be_zsu,
5897 gen_helper_sve_stsd_be_zsu,
5898 gen_helper_sve_stdd_be_zsu, },
5899 { gen_helper_sve_stbd_zss,
5900 gen_helper_sve_sthd_be_zss,
5901 gen_helper_sve_stsd_be_zss,
5902 gen_helper_sve_stdd_be_zss, },
5903 { gen_helper_sve_stbd_zd,
5904 gen_helper_sve_sthd_be_zd,
5905 gen_helper_sve_stsd_be_zd,
5906 gen_helper_sve_stdd_be_zd, } } },
5907 { /* MTE Inactive */
5908 { /* Little-endian */
5909 { gen_helper_sve_stbd_zsu_mte,
5910 gen_helper_sve_sthd_le_zsu_mte,
5911 gen_helper_sve_stsd_le_zsu_mte,
5912 gen_helper_sve_stdd_le_zsu_mte, },
5913 { gen_helper_sve_stbd_zss_mte,
5914 gen_helper_sve_sthd_le_zss_mte,
5915 gen_helper_sve_stsd_le_zss_mte,
5916 gen_helper_sve_stdd_le_zss_mte, },
5917 { gen_helper_sve_stbd_zd_mte,
5918 gen_helper_sve_sthd_le_zd_mte,
5919 gen_helper_sve_stsd_le_zd_mte,
5920 gen_helper_sve_stdd_le_zd_mte, } },
5921 { /* Big-endian */
5922 { gen_helper_sve_stbd_zsu_mte,
5923 gen_helper_sve_sthd_be_zsu_mte,
5924 gen_helper_sve_stsd_be_zsu_mte,
5925 gen_helper_sve_stdd_be_zsu_mte, },
5926 { gen_helper_sve_stbd_zss_mte,
5927 gen_helper_sve_sthd_be_zss_mte,
5928 gen_helper_sve_stsd_be_zss_mte,
5929 gen_helper_sve_stdd_be_zss_mte, },
5930 { gen_helper_sve_stbd_zd_mte,
5931 gen_helper_sve_sthd_be_zd_mte,
5932 gen_helper_sve_stsd_be_zd_mte,
5933 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5934};
5935
3a7be554 5936static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5937{
f6dbf62a 5938 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5939 bool be = s->be_data == MO_BE;
5940 bool mte = s->mte_active[0];
f6dbf62a
RH
5941
5942 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5943 return false;
5944 }
5945 if (!sve_access_check(s)) {
5946 return true;
5947 }
5948 switch (a->esz) {
5949 case MO_32:
d28d12f0 5950 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5951 break;
5952 case MO_64:
d28d12f0 5953 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5954 break;
5955 default:
5956 g_assert_not_reached();
5957 }
5958 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5959 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5960 return true;
5961}
dec6cf6b 5962
3a7be554 5963static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5964{
5965 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5966 bool be = s->be_data == MO_BE;
5967 bool mte = s->mte_active[0];
408ecde9
RH
5968
5969 if (a->esz < a->msz) {
5970 return false;
5971 }
5972 if (!sve_access_check(s)) {
5973 return true;
5974 }
5975
5976 switch (a->esz) {
5977 case MO_32:
d28d12f0 5978 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5979 break;
5980 case MO_64:
d28d12f0 5981 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5982 break;
5983 }
5984 assert(fn != NULL);
5985
5986 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5987 * by loading the immediate into the scalar parameter.
5988 */
2ccdf94f
RH
5989 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5990 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5991 return true;
5992}
5993
6ebca45f
SL
5994static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5995{
b17ab470
RH
5996 gen_helper_gvec_mem_scatter *fn;
5997 bool be = s->be_data == MO_BE;
5998 bool mte = s->mte_active[0];
5999
6000 if (a->esz < a->msz) {
6001 return false;
6002 }
6ebca45f
SL
6003 if (!dc_isar_feature(aa64_sve2, s)) {
6004 return false;
6005 }
b17ab470
RH
6006 if (!sve_access_check(s)) {
6007 return true;
6008 }
6009
6010 switch (a->esz) {
6011 case MO_32:
6012 fn = scatter_store_fn32[mte][be][0][a->msz];
6013 break;
6014 case MO_64:
6015 fn = scatter_store_fn64[mte][be][2][a->msz];
6016 break;
6017 default:
6018 g_assert_not_reached();
6019 }
6020
6021 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6022 cpu_reg(s, a->rm), a->msz, true, fn);
6023 return true;
6ebca45f
SL
6024}
6025
dec6cf6b
RH
6026/*
6027 * Prefetches
6028 */
6029
3a7be554 6030static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6031{
6032 /* Prefetch is a nop within QEMU. */
2f95a3b0 6033 (void)sve_access_check(s);
dec6cf6b
RH
6034 return true;
6035}
6036
3a7be554 6037static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6038{
6039 if (a->rm == 31) {
6040 return false;
6041 }
6042 /* Prefetch is a nop within QEMU. */
2f95a3b0 6043 (void)sve_access_check(s);
dec6cf6b
RH
6044 return true;
6045}
a2103582
RH
6046
6047/*
6048 * Move Prefix
6049 *
6050 * TODO: The implementation so far could handle predicated merging movprfx.
6051 * The helper functions as written take an extra source register to
6052 * use in the operation, but the result is only written when predication
6053 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6054 * to allow the final write back to the destination to be unconditional.
6055 * For predicated zeroing movprfx, we need to rearrange the helpers to
6056 * allow the final write back to zero inactives.
6057 *
6058 * In the meantime, just emit the moves.
6059 */
6060
3a7be554 6061static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6062{
6063 return do_mov_z(s, a->rd, a->rn);
6064}
6065
3a7be554 6066static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582 6067{
68cc4ee3 6068 return do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
a2103582
RH
6069}
6070
3a7be554 6071static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6072{
60245996 6073 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6074}
5dad1ba5
RH
6075
6076/*
6077 * SVE2 Integer Multiply - Unpredicated
6078 */
6079
b262215b 6080TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 6081
bd394cf5
RH
6082static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6083 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6084 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6085};
6086TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6087 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6088
bd394cf5
RH
6089static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6090 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6091 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6092};
6093TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6094 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6095
bd394cf5
RH
6096TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6097 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6098
bd394cf5
RH
6099static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6100 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6101 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6102};
6103TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6104 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6105
bd394cf5
RH
6106static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6107 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6108 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6109};
6110TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6111 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6112
d4b1e59d
RH
6113/*
6114 * SVE2 Integer - Predicated
6115 */
6116
5880bdc0
RH
6117static gen_helper_gvec_4 * const sadlp_fns[4] = {
6118 NULL, gen_helper_sve2_sadalp_zpzz_h,
6119 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6120};
6121TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6122 sadlp_fns[a->esz], a, 0)
d4b1e59d 6123
5880bdc0
RH
6124static gen_helper_gvec_4 * const uadlp_fns[4] = {
6125 NULL, gen_helper_sve2_uadalp_zpzz_h,
6126 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6127};
6128TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6129 uadlp_fns[a->esz], a, 0)
db366da8
RH
6130
6131/*
6132 * SVE2 integer unary operations (predicated)
6133 */
6134
b2c00961
RH
6135TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6136 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6137
b2c00961
RH
6138TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6139 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6140
b2c00961
RH
6141static gen_helper_gvec_3 * const sqabs_fns[4] = {
6142 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6143 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6144};
6145TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6146
b2c00961
RH
6147static gen_helper_gvec_3 * const sqneg_fns[4] = {
6148 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6149 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6150};
6151TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 6152
5880bdc0
RH
6153DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6154DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6155DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 6156
5880bdc0
RH
6157DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6158DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6159DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 6160
5880bdc0
RH
6161DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6162DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6163DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 6164
5880bdc0
RH
6165DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6166DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6167DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 6168
5880bdc0
RH
6169DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6170DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6171DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6172DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6173DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6174
5880bdc0
RH
6175DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6176DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6177DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6178DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6179DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6180DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6181
6182/*
6183 * SVE2 Widening Integer Arithmetic
6184 */
6185
615f19fe
RH
6186static gen_helper_gvec_3 * const saddl_fns[4] = {
6187 NULL, gen_helper_sve2_saddl_h,
6188 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6189};
6190TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6191 saddl_fns[a->esz], a, 0)
6192TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6193 saddl_fns[a->esz], a, 3)
6194TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6195 saddl_fns[a->esz], a, 2)
6196
6197static gen_helper_gvec_3 * const ssubl_fns[4] = {
6198 NULL, gen_helper_sve2_ssubl_h,
6199 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6200};
6201TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6202 ssubl_fns[a->esz], a, 0)
6203TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6204 ssubl_fns[a->esz], a, 3)
6205TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6206 ssubl_fns[a->esz], a, 2)
6207TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6208 ssubl_fns[a->esz], a, 1)
6209
6210static gen_helper_gvec_3 * const sabdl_fns[4] = {
6211 NULL, gen_helper_sve2_sabdl_h,
6212 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6213};
6214TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6215 sabdl_fns[a->esz], a, 0)
6216TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6217 sabdl_fns[a->esz], a, 3)
6218
6219static gen_helper_gvec_3 * const uaddl_fns[4] = {
6220 NULL, gen_helper_sve2_uaddl_h,
6221 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6222};
6223TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6224 uaddl_fns[a->esz], a, 0)
6225TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6226 uaddl_fns[a->esz], a, 3)
6227
6228static gen_helper_gvec_3 * const usubl_fns[4] = {
6229 NULL, gen_helper_sve2_usubl_h,
6230 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6231};
6232TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6233 usubl_fns[a->esz], a, 0)
6234TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6235 usubl_fns[a->esz], a, 3)
6236
6237static gen_helper_gvec_3 * const uabdl_fns[4] = {
6238 NULL, gen_helper_sve2_uabdl_h,
6239 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6240};
6241TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6242 uabdl_fns[a->esz], a, 0)
6243TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6244 uabdl_fns[a->esz], a, 3)
6245
6246static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6247 NULL, gen_helper_sve2_sqdmull_zzz_h,
6248 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6249};
6250TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6251 sqdmull_fns[a->esz], a, 0)
6252TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6253 sqdmull_fns[a->esz], a, 3)
6254
6255static gen_helper_gvec_3 * const smull_fns[4] = {
6256 NULL, gen_helper_sve2_smull_zzz_h,
6257 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6258};
6259TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6260 smull_fns[a->esz], a, 0)
6261TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6262 smull_fns[a->esz], a, 3)
6263
6264static gen_helper_gvec_3 * const umull_fns[4] = {
6265 NULL, gen_helper_sve2_umull_zzz_h,
6266 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6267};
6268TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6269 umull_fns[a->esz], a, 0)
6270TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6271 umull_fns[a->esz], a, 3)
6272
6273static gen_helper_gvec_3 * const eoril_fns[4] = {
6274 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6275 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6276};
6277TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6278TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6279
e3a56131
RH
6280static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6281{
6282 static gen_helper_gvec_3 * const fns[4] = {
6283 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6284 NULL, gen_helper_sve2_pmull_d,
6285 };
6286 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6287 return false;
6288 }
615f19fe 6289 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6290}
6291
615f19fe
RH
6292TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6293TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6294
615f19fe
RH
6295static gen_helper_gvec_3 * const saddw_fns[4] = {
6296 NULL, gen_helper_sve2_saddw_h,
6297 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6298};
6299TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6300TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6301
615f19fe
RH
6302static gen_helper_gvec_3 * const ssubw_fns[4] = {
6303 NULL, gen_helper_sve2_ssubw_h,
6304 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6305};
6306TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6307TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6308
615f19fe
RH
6309static gen_helper_gvec_3 * const uaddw_fns[4] = {
6310 NULL, gen_helper_sve2_uaddw_h,
6311 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6312};
6313TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6314TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6315
615f19fe
RH
6316static gen_helper_gvec_3 * const usubw_fns[4] = {
6317 NULL, gen_helper_sve2_usubw_h,
6318 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6319};
6320TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6321TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6322
6323static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6324{
6325 int top = imm & 1;
6326 int shl = imm >> 1;
6327 int halfbits = 4 << vece;
6328
6329 if (top) {
6330 if (shl == halfbits) {
6331 TCGv_vec t = tcg_temp_new_vec_matching(d);
6332 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6333 tcg_gen_and_vec(vece, d, n, t);
6334 tcg_temp_free_vec(t);
6335 } else {
6336 tcg_gen_sari_vec(vece, d, n, halfbits);
6337 tcg_gen_shli_vec(vece, d, d, shl);
6338 }
6339 } else {
6340 tcg_gen_shli_vec(vece, d, n, halfbits);
6341 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6342 }
6343}
6344
6345static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6346{
6347 int halfbits = 4 << vece;
6348 int top = imm & 1;
6349 int shl = (imm >> 1);
6350 int shift;
6351 uint64_t mask;
6352
6353 mask = MAKE_64BIT_MASK(0, halfbits);
6354 mask <<= shl;
6355 mask = dup_const(vece, mask);
6356
6357 shift = shl - top * halfbits;
6358 if (shift < 0) {
6359 tcg_gen_shri_i64(d, n, -shift);
6360 } else {
6361 tcg_gen_shli_i64(d, n, shift);
6362 }
6363 tcg_gen_andi_i64(d, d, mask);
6364}
6365
6366static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6367{
6368 gen_ushll_i64(MO_16, d, n, imm);
6369}
6370
6371static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6372{
6373 gen_ushll_i64(MO_32, d, n, imm);
6374}
6375
6376static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6377{
6378 gen_ushll_i64(MO_64, d, n, imm);
6379}
6380
6381static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6382{
6383 int halfbits = 4 << vece;
6384 int top = imm & 1;
6385 int shl = imm >> 1;
6386
6387 if (top) {
6388 if (shl == halfbits) {
6389 TCGv_vec t = tcg_temp_new_vec_matching(d);
6390 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6391 tcg_gen_and_vec(vece, d, n, t);
6392 tcg_temp_free_vec(t);
6393 } else {
6394 tcg_gen_shri_vec(vece, d, n, halfbits);
6395 tcg_gen_shli_vec(vece, d, d, shl);
6396 }
6397 } else {
6398 if (shl == 0) {
6399 TCGv_vec t = tcg_temp_new_vec_matching(d);
6400 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6401 tcg_gen_and_vec(vece, d, n, t);
6402 tcg_temp_free_vec(t);
6403 } else {
6404 tcg_gen_shli_vec(vece, d, n, halfbits);
6405 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6406 }
6407 }
6408}
6409
6410static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6411 bool sel, bool uns)
6412{
6413 static const TCGOpcode sshll_list[] = {
6414 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6415 };
6416 static const TCGOpcode ushll_list[] = {
6417 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6418 };
6419 static const GVecGen2i ops[2][3] = {
6420 { { .fniv = gen_sshll_vec,
6421 .opt_opc = sshll_list,
6422 .fno = gen_helper_sve2_sshll_h,
6423 .vece = MO_16 },
6424 { .fniv = gen_sshll_vec,
6425 .opt_opc = sshll_list,
6426 .fno = gen_helper_sve2_sshll_s,
6427 .vece = MO_32 },
6428 { .fniv = gen_sshll_vec,
6429 .opt_opc = sshll_list,
6430 .fno = gen_helper_sve2_sshll_d,
6431 .vece = MO_64 } },
6432 { { .fni8 = gen_ushll16_i64,
6433 .fniv = gen_ushll_vec,
6434 .opt_opc = ushll_list,
6435 .fno = gen_helper_sve2_ushll_h,
6436 .vece = MO_16 },
6437 { .fni8 = gen_ushll32_i64,
6438 .fniv = gen_ushll_vec,
6439 .opt_opc = ushll_list,
6440 .fno = gen_helper_sve2_ushll_s,
6441 .vece = MO_32 },
6442 { .fni8 = gen_ushll64_i64,
6443 .fniv = gen_ushll_vec,
6444 .opt_opc = ushll_list,
6445 .fno = gen_helper_sve2_ushll_d,
6446 .vece = MO_64 } },
6447 };
6448
6449 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6450 return false;
6451 }
6452 if (sve_access_check(s)) {
6453 unsigned vsz = vec_full_reg_size(s);
6454 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6455 vec_full_reg_offset(s, a->rn),
6456 vsz, vsz, (a->imm << 1) | sel,
6457 &ops[uns][a->esz]);
6458 }
6459 return true;
6460}
6461
6462static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6463{
6464 return do_sve2_shll_tb(s, a, false, false);
6465}
6466
6467static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6468{
6469 return do_sve2_shll_tb(s, a, true, false);
6470}
6471
6472static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6473{
6474 return do_sve2_shll_tb(s, a, false, true);
6475}
6476
6477static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6478{
6479 return do_sve2_shll_tb(s, a, true, true);
6480}
cb9c33b8 6481
615f19fe
RH
6482static gen_helper_gvec_3 * const bext_fns[4] = {
6483 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6484 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6485};
6486TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6487 bext_fns[a->esz], a, 0)
ed4a6387 6488
615f19fe
RH
6489static gen_helper_gvec_3 * const bdep_fns[4] = {
6490 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6491 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6492};
6493TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6494 bdep_fns[a->esz], a, 0)
ed4a6387 6495
615f19fe
RH
6496static gen_helper_gvec_3 * const bgrp_fns[4] = {
6497 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6498 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6499};
6500TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6501 bgrp_fns[a->esz], a, 0)
ed4a6387 6502
615f19fe
RH
6503static gen_helper_gvec_3 * const cadd_fns[4] = {
6504 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6505 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6506};
6507TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6508 cadd_fns[a->esz], a, 0)
6509TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6510 cadd_fns[a->esz], a, 1)
6511
6512static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6513 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6514 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6515};
6516TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6517 sqcadd_fns[a->esz], a, 0)
6518TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6519 sqcadd_fns[a->esz], a, 1)
38650638 6520
eeb4e84d
RH
6521static gen_helper_gvec_4 * const sabal_fns[4] = {
6522 NULL, gen_helper_sve2_sabal_h,
6523 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6524};
6525TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6526TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6527
eeb4e84d
RH
6528static gen_helper_gvec_4 * const uabal_fns[4] = {
6529 NULL, gen_helper_sve2_uabal_h,
6530 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6531};
6532TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6533TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6534
6535static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6536{
6537 static gen_helper_gvec_4 * const fns[2] = {
6538 gen_helper_sve2_adcl_s,
6539 gen_helper_sve2_adcl_d,
6540 };
6541 /*
6542 * Note that in this case the ESZ field encodes both size and sign.
6543 * Split out 'subtract' into bit 1 of the data field for the helper.
6544 */
eeb4e84d 6545 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6546}
6547
eeb4e84d
RH
6548TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6549TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6550
f2be26a5
RH
6551TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6552TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6553TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6554TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6555TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6556TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6557
79828dcb
RH
6558TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6559TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d
RH
6560
6561static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6562 const GVecGen2 ops[3])
6563{
6564 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6565 !dc_isar_feature(aa64_sve2, s)) {
6566 return false;
6567 }
6568 if (sve_access_check(s)) {
6569 unsigned vsz = vec_full_reg_size(s);
6570 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6571 vec_full_reg_offset(s, a->rn),
6572 vsz, vsz, &ops[a->esz]);
6573 }
6574 return true;
6575}
6576
6577static const TCGOpcode sqxtn_list[] = {
6578 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6579};
6580
6581static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6582{
6583 TCGv_vec t = tcg_temp_new_vec_matching(d);
6584 int halfbits = 4 << vece;
6585 int64_t mask = (1ull << halfbits) - 1;
6586 int64_t min = -1ull << (halfbits - 1);
6587 int64_t max = -min - 1;
6588
6589 tcg_gen_dupi_vec(vece, t, min);
6590 tcg_gen_smax_vec(vece, d, n, t);
6591 tcg_gen_dupi_vec(vece, t, max);
6592 tcg_gen_smin_vec(vece, d, d, t);
6593 tcg_gen_dupi_vec(vece, t, mask);
6594 tcg_gen_and_vec(vece, d, d, t);
6595 tcg_temp_free_vec(t);
6596}
6597
6598static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6599{
6600 static const GVecGen2 ops[3] = {
6601 { .fniv = gen_sqxtnb_vec,
6602 .opt_opc = sqxtn_list,
6603 .fno = gen_helper_sve2_sqxtnb_h,
6604 .vece = MO_16 },
6605 { .fniv = gen_sqxtnb_vec,
6606 .opt_opc = sqxtn_list,
6607 .fno = gen_helper_sve2_sqxtnb_s,
6608 .vece = MO_32 },
6609 { .fniv = gen_sqxtnb_vec,
6610 .opt_opc = sqxtn_list,
6611 .fno = gen_helper_sve2_sqxtnb_d,
6612 .vece = MO_64 },
6613 };
6614 return do_sve2_narrow_extract(s, a, ops);
6615}
6616
6617static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6618{
6619 TCGv_vec t = tcg_temp_new_vec_matching(d);
6620 int halfbits = 4 << vece;
6621 int64_t mask = (1ull << halfbits) - 1;
6622 int64_t min = -1ull << (halfbits - 1);
6623 int64_t max = -min - 1;
6624
6625 tcg_gen_dupi_vec(vece, t, min);
6626 tcg_gen_smax_vec(vece, n, n, t);
6627 tcg_gen_dupi_vec(vece, t, max);
6628 tcg_gen_smin_vec(vece, n, n, t);
6629 tcg_gen_shli_vec(vece, n, n, halfbits);
6630 tcg_gen_dupi_vec(vece, t, mask);
6631 tcg_gen_bitsel_vec(vece, d, t, d, n);
6632 tcg_temp_free_vec(t);
6633}
6634
6635static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6636{
6637 static const GVecGen2 ops[3] = {
6638 { .fniv = gen_sqxtnt_vec,
6639 .opt_opc = sqxtn_list,
6640 .load_dest = true,
6641 .fno = gen_helper_sve2_sqxtnt_h,
6642 .vece = MO_16 },
6643 { .fniv = gen_sqxtnt_vec,
6644 .opt_opc = sqxtn_list,
6645 .load_dest = true,
6646 .fno = gen_helper_sve2_sqxtnt_s,
6647 .vece = MO_32 },
6648 { .fniv = gen_sqxtnt_vec,
6649 .opt_opc = sqxtn_list,
6650 .load_dest = true,
6651 .fno = gen_helper_sve2_sqxtnt_d,
6652 .vece = MO_64 },
6653 };
6654 return do_sve2_narrow_extract(s, a, ops);
6655}
6656
6657static const TCGOpcode uqxtn_list[] = {
6658 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6659};
6660
6661static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6662{
6663 TCGv_vec t = tcg_temp_new_vec_matching(d);
6664 int halfbits = 4 << vece;
6665 int64_t max = (1ull << halfbits) - 1;
6666
6667 tcg_gen_dupi_vec(vece, t, max);
6668 tcg_gen_umin_vec(vece, d, n, t);
6669 tcg_temp_free_vec(t);
6670}
6671
6672static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6673{
6674 static const GVecGen2 ops[3] = {
6675 { .fniv = gen_uqxtnb_vec,
6676 .opt_opc = uqxtn_list,
6677 .fno = gen_helper_sve2_uqxtnb_h,
6678 .vece = MO_16 },
6679 { .fniv = gen_uqxtnb_vec,
6680 .opt_opc = uqxtn_list,
6681 .fno = gen_helper_sve2_uqxtnb_s,
6682 .vece = MO_32 },
6683 { .fniv = gen_uqxtnb_vec,
6684 .opt_opc = uqxtn_list,
6685 .fno = gen_helper_sve2_uqxtnb_d,
6686 .vece = MO_64 },
6687 };
6688 return do_sve2_narrow_extract(s, a, ops);
6689}
6690
6691static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6692{
6693 TCGv_vec t = tcg_temp_new_vec_matching(d);
6694 int halfbits = 4 << vece;
6695 int64_t max = (1ull << halfbits) - 1;
6696
6697 tcg_gen_dupi_vec(vece, t, max);
6698 tcg_gen_umin_vec(vece, n, n, t);
6699 tcg_gen_shli_vec(vece, n, n, halfbits);
6700 tcg_gen_bitsel_vec(vece, d, t, d, n);
6701 tcg_temp_free_vec(t);
6702}
6703
6704static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
6705{
6706 static const GVecGen2 ops[3] = {
6707 { .fniv = gen_uqxtnt_vec,
6708 .opt_opc = uqxtn_list,
6709 .load_dest = true,
6710 .fno = gen_helper_sve2_uqxtnt_h,
6711 .vece = MO_16 },
6712 { .fniv = gen_uqxtnt_vec,
6713 .opt_opc = uqxtn_list,
6714 .load_dest = true,
6715 .fno = gen_helper_sve2_uqxtnt_s,
6716 .vece = MO_32 },
6717 { .fniv = gen_uqxtnt_vec,
6718 .opt_opc = uqxtn_list,
6719 .load_dest = true,
6720 .fno = gen_helper_sve2_uqxtnt_d,
6721 .vece = MO_64 },
6722 };
6723 return do_sve2_narrow_extract(s, a, ops);
6724}
6725
6726static const TCGOpcode sqxtun_list[] = {
6727 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6728};
6729
6730static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6731{
6732 TCGv_vec t = tcg_temp_new_vec_matching(d);
6733 int halfbits = 4 << vece;
6734 int64_t max = (1ull << halfbits) - 1;
6735
6736 tcg_gen_dupi_vec(vece, t, 0);
6737 tcg_gen_smax_vec(vece, d, n, t);
6738 tcg_gen_dupi_vec(vece, t, max);
6739 tcg_gen_umin_vec(vece, d, d, t);
6740 tcg_temp_free_vec(t);
6741}
6742
6743static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
6744{
6745 static const GVecGen2 ops[3] = {
6746 { .fniv = gen_sqxtunb_vec,
6747 .opt_opc = sqxtun_list,
6748 .fno = gen_helper_sve2_sqxtunb_h,
6749 .vece = MO_16 },
6750 { .fniv = gen_sqxtunb_vec,
6751 .opt_opc = sqxtun_list,
6752 .fno = gen_helper_sve2_sqxtunb_s,
6753 .vece = MO_32 },
6754 { .fniv = gen_sqxtunb_vec,
6755 .opt_opc = sqxtun_list,
6756 .fno = gen_helper_sve2_sqxtunb_d,
6757 .vece = MO_64 },
6758 };
6759 return do_sve2_narrow_extract(s, a, ops);
6760}
6761
6762static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6763{
6764 TCGv_vec t = tcg_temp_new_vec_matching(d);
6765 int halfbits = 4 << vece;
6766 int64_t max = (1ull << halfbits) - 1;
6767
6768 tcg_gen_dupi_vec(vece, t, 0);
6769 tcg_gen_smax_vec(vece, n, n, t);
6770 tcg_gen_dupi_vec(vece, t, max);
6771 tcg_gen_umin_vec(vece, n, n, t);
6772 tcg_gen_shli_vec(vece, n, n, halfbits);
6773 tcg_gen_bitsel_vec(vece, d, t, d, n);
6774 tcg_temp_free_vec(t);
6775}
6776
6777static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
6778{
6779 static const GVecGen2 ops[3] = {
6780 { .fniv = gen_sqxtunt_vec,
6781 .opt_opc = sqxtun_list,
6782 .load_dest = true,
6783 .fno = gen_helper_sve2_sqxtunt_h,
6784 .vece = MO_16 },
6785 { .fniv = gen_sqxtunt_vec,
6786 .opt_opc = sqxtun_list,
6787 .load_dest = true,
6788 .fno = gen_helper_sve2_sqxtunt_s,
6789 .vece = MO_32 },
6790 { .fniv = gen_sqxtunt_vec,
6791 .opt_opc = sqxtun_list,
6792 .load_dest = true,
6793 .fno = gen_helper_sve2_sqxtunt_d,
6794 .vece = MO_64 },
6795 };
6796 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
6797}
6798
6799static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
6800 const GVecGen2i ops[3])
6801{
6802 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
6803 return false;
6804 }
6805 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6806 if (sve_access_check(s)) {
6807 unsigned vsz = vec_full_reg_size(s);
6808 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6809 vec_full_reg_offset(s, a->rn),
6810 vsz, vsz, a->imm, &ops[a->esz]);
6811 }
6812 return true;
6813}
6814
6815static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6816{
6817 int halfbits = 4 << vece;
6818 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6819
6820 tcg_gen_shri_i64(d, n, shr);
6821 tcg_gen_andi_i64(d, d, mask);
6822}
6823
6824static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6825{
6826 gen_shrnb_i64(MO_16, d, n, shr);
6827}
6828
6829static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6830{
6831 gen_shrnb_i64(MO_32, d, n, shr);
6832}
6833
6834static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6835{
6836 gen_shrnb_i64(MO_64, d, n, shr);
6837}
6838
6839static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6840{
6841 TCGv_vec t = tcg_temp_new_vec_matching(d);
6842 int halfbits = 4 << vece;
6843 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6844
6845 tcg_gen_shri_vec(vece, n, n, shr);
6846 tcg_gen_dupi_vec(vece, t, mask);
6847 tcg_gen_and_vec(vece, d, n, t);
6848 tcg_temp_free_vec(t);
6849}
6850
6851static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
6852{
6853 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
6854 static const GVecGen2i ops[3] = {
6855 { .fni8 = gen_shrnb16_i64,
6856 .fniv = gen_shrnb_vec,
6857 .opt_opc = vec_list,
6858 .fno = gen_helper_sve2_shrnb_h,
6859 .vece = MO_16 },
6860 { .fni8 = gen_shrnb32_i64,
6861 .fniv = gen_shrnb_vec,
6862 .opt_opc = vec_list,
6863 .fno = gen_helper_sve2_shrnb_s,
6864 .vece = MO_32 },
6865 { .fni8 = gen_shrnb64_i64,
6866 .fniv = gen_shrnb_vec,
6867 .opt_opc = vec_list,
6868 .fno = gen_helper_sve2_shrnb_d,
6869 .vece = MO_64 },
6870 };
6871 return do_sve2_shr_narrow(s, a, ops);
6872}
6873
6874static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6875{
6876 int halfbits = 4 << vece;
6877 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6878
6879 tcg_gen_shli_i64(n, n, halfbits - shr);
6880 tcg_gen_andi_i64(n, n, ~mask);
6881 tcg_gen_andi_i64(d, d, mask);
6882 tcg_gen_or_i64(d, d, n);
6883}
6884
6885static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6886{
6887 gen_shrnt_i64(MO_16, d, n, shr);
6888}
6889
6890static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6891{
6892 gen_shrnt_i64(MO_32, d, n, shr);
6893}
6894
6895static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6896{
6897 tcg_gen_shri_i64(n, n, shr);
6898 tcg_gen_deposit_i64(d, d, n, 32, 32);
6899}
6900
6901static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6902{
6903 TCGv_vec t = tcg_temp_new_vec_matching(d);
6904 int halfbits = 4 << vece;
6905 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6906
6907 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6908 tcg_gen_dupi_vec(vece, t, mask);
6909 tcg_gen_bitsel_vec(vece, d, t, d, n);
6910 tcg_temp_free_vec(t);
6911}
6912
6913static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
6914{
6915 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
6916 static const GVecGen2i ops[3] = {
6917 { .fni8 = gen_shrnt16_i64,
6918 .fniv = gen_shrnt_vec,
6919 .opt_opc = vec_list,
6920 .load_dest = true,
6921 .fno = gen_helper_sve2_shrnt_h,
6922 .vece = MO_16 },
6923 { .fni8 = gen_shrnt32_i64,
6924 .fniv = gen_shrnt_vec,
6925 .opt_opc = vec_list,
6926 .load_dest = true,
6927 .fno = gen_helper_sve2_shrnt_s,
6928 .vece = MO_32 },
6929 { .fni8 = gen_shrnt64_i64,
6930 .fniv = gen_shrnt_vec,
6931 .opt_opc = vec_list,
6932 .load_dest = true,
6933 .fno = gen_helper_sve2_shrnt_d,
6934 .vece = MO_64 },
6935 };
6936 return do_sve2_shr_narrow(s, a, ops);
6937}
6938
6939static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
6940{
6941 static const GVecGen2i ops[3] = {
6942 { .fno = gen_helper_sve2_rshrnb_h },
6943 { .fno = gen_helper_sve2_rshrnb_s },
6944 { .fno = gen_helper_sve2_rshrnb_d },
6945 };
6946 return do_sve2_shr_narrow(s, a, ops);
6947}
6948
6949static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
6950{
6951 static const GVecGen2i ops[3] = {
6952 { .fno = gen_helper_sve2_rshrnt_h },
6953 { .fno = gen_helper_sve2_rshrnt_s },
6954 { .fno = gen_helper_sve2_rshrnt_d },
6955 };
6956 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
6957}
6958
6959static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6960 TCGv_vec n, int64_t shr)
6961{
6962 TCGv_vec t = tcg_temp_new_vec_matching(d);
6963 int halfbits = 4 << vece;
6964
6965 tcg_gen_sari_vec(vece, n, n, shr);
6966 tcg_gen_dupi_vec(vece, t, 0);
6967 tcg_gen_smax_vec(vece, n, n, t);
6968 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6969 tcg_gen_umin_vec(vece, d, n, t);
6970 tcg_temp_free_vec(t);
6971}
6972
6973static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
6974{
6975 static const TCGOpcode vec_list[] = {
6976 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6977 };
6978 static const GVecGen2i ops[3] = {
6979 { .fniv = gen_sqshrunb_vec,
6980 .opt_opc = vec_list,
6981 .fno = gen_helper_sve2_sqshrunb_h,
6982 .vece = MO_16 },
6983 { .fniv = gen_sqshrunb_vec,
6984 .opt_opc = vec_list,
6985 .fno = gen_helper_sve2_sqshrunb_s,
6986 .vece = MO_32 },
6987 { .fniv = gen_sqshrunb_vec,
6988 .opt_opc = vec_list,
6989 .fno = gen_helper_sve2_sqshrunb_d,
6990 .vece = MO_64 },
6991 };
6992 return do_sve2_shr_narrow(s, a, ops);
6993}
6994
6995static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6996 TCGv_vec n, int64_t shr)
6997{
6998 TCGv_vec t = tcg_temp_new_vec_matching(d);
6999 int halfbits = 4 << vece;
7000
7001 tcg_gen_sari_vec(vece, n, n, shr);
7002 tcg_gen_dupi_vec(vece, t, 0);
7003 tcg_gen_smax_vec(vece, n, n, t);
7004 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7005 tcg_gen_umin_vec(vece, n, n, t);
7006 tcg_gen_shli_vec(vece, n, n, halfbits);
7007 tcg_gen_bitsel_vec(vece, d, t, d, n);
7008 tcg_temp_free_vec(t);
7009}
7010
7011static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7012{
7013 static const TCGOpcode vec_list[] = {
7014 INDEX_op_shli_vec, INDEX_op_sari_vec,
7015 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7016 };
7017 static const GVecGen2i ops[3] = {
7018 { .fniv = gen_sqshrunt_vec,
7019 .opt_opc = vec_list,
7020 .load_dest = true,
7021 .fno = gen_helper_sve2_sqshrunt_h,
7022 .vece = MO_16 },
7023 { .fniv = gen_sqshrunt_vec,
7024 .opt_opc = vec_list,
7025 .load_dest = true,
7026 .fno = gen_helper_sve2_sqshrunt_s,
7027 .vece = MO_32 },
7028 { .fniv = gen_sqshrunt_vec,
7029 .opt_opc = vec_list,
7030 .load_dest = true,
7031 .fno = gen_helper_sve2_sqshrunt_d,
7032 .vece = MO_64 },
7033 };
7034 return do_sve2_shr_narrow(s, a, ops);
7035}
7036
7037static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7038{
7039 static const GVecGen2i ops[3] = {
7040 { .fno = gen_helper_sve2_sqrshrunb_h },
7041 { .fno = gen_helper_sve2_sqrshrunb_s },
7042 { .fno = gen_helper_sve2_sqrshrunb_d },
7043 };
7044 return do_sve2_shr_narrow(s, a, ops);
7045}
7046
7047static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7048{
7049 static const GVecGen2i ops[3] = {
7050 { .fno = gen_helper_sve2_sqrshrunt_h },
7051 { .fno = gen_helper_sve2_sqrshrunt_s },
7052 { .fno = gen_helper_sve2_sqrshrunt_d },
7053 };
7054 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7055}
7056
743bb147
RH
7057static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7058 TCGv_vec n, int64_t shr)
7059{
7060 TCGv_vec t = tcg_temp_new_vec_matching(d);
7061 int halfbits = 4 << vece;
7062 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7063 int64_t min = -max - 1;
7064
7065 tcg_gen_sari_vec(vece, n, n, shr);
7066 tcg_gen_dupi_vec(vece, t, min);
7067 tcg_gen_smax_vec(vece, n, n, t);
7068 tcg_gen_dupi_vec(vece, t, max);
7069 tcg_gen_smin_vec(vece, n, n, t);
7070 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7071 tcg_gen_and_vec(vece, d, n, t);
7072 tcg_temp_free_vec(t);
7073}
7074
7075static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7076{
7077 static const TCGOpcode vec_list[] = {
7078 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7079 };
7080 static const GVecGen2i ops[3] = {
7081 { .fniv = gen_sqshrnb_vec,
7082 .opt_opc = vec_list,
7083 .fno = gen_helper_sve2_sqshrnb_h,
7084 .vece = MO_16 },
7085 { .fniv = gen_sqshrnb_vec,
7086 .opt_opc = vec_list,
7087 .fno = gen_helper_sve2_sqshrnb_s,
7088 .vece = MO_32 },
7089 { .fniv = gen_sqshrnb_vec,
7090 .opt_opc = vec_list,
7091 .fno = gen_helper_sve2_sqshrnb_d,
7092 .vece = MO_64 },
7093 };
7094 return do_sve2_shr_narrow(s, a, ops);
7095}
7096
7097static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7098 TCGv_vec n, int64_t shr)
7099{
7100 TCGv_vec t = tcg_temp_new_vec_matching(d);
7101 int halfbits = 4 << vece;
7102 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7103 int64_t min = -max - 1;
7104
7105 tcg_gen_sari_vec(vece, n, n, shr);
7106 tcg_gen_dupi_vec(vece, t, min);
7107 tcg_gen_smax_vec(vece, n, n, t);
7108 tcg_gen_dupi_vec(vece, t, max);
7109 tcg_gen_smin_vec(vece, n, n, t);
7110 tcg_gen_shli_vec(vece, n, n, halfbits);
7111 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7112 tcg_gen_bitsel_vec(vece, d, t, d, n);
7113 tcg_temp_free_vec(t);
7114}
7115
7116static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7117{
7118 static const TCGOpcode vec_list[] = {
7119 INDEX_op_shli_vec, INDEX_op_sari_vec,
7120 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7121 };
7122 static const GVecGen2i ops[3] = {
7123 { .fniv = gen_sqshrnt_vec,
7124 .opt_opc = vec_list,
7125 .load_dest = true,
7126 .fno = gen_helper_sve2_sqshrnt_h,
7127 .vece = MO_16 },
7128 { .fniv = gen_sqshrnt_vec,
7129 .opt_opc = vec_list,
7130 .load_dest = true,
7131 .fno = gen_helper_sve2_sqshrnt_s,
7132 .vece = MO_32 },
7133 { .fniv = gen_sqshrnt_vec,
7134 .opt_opc = vec_list,
7135 .load_dest = true,
7136 .fno = gen_helper_sve2_sqshrnt_d,
7137 .vece = MO_64 },
7138 };
7139 return do_sve2_shr_narrow(s, a, ops);
7140}
7141
7142static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7143{
7144 static const GVecGen2i ops[3] = {
7145 { .fno = gen_helper_sve2_sqrshrnb_h },
7146 { .fno = gen_helper_sve2_sqrshrnb_s },
7147 { .fno = gen_helper_sve2_sqrshrnb_d },
7148 };
7149 return do_sve2_shr_narrow(s, a, ops);
7150}
7151
7152static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7153{
7154 static const GVecGen2i ops[3] = {
7155 { .fno = gen_helper_sve2_sqrshrnt_h },
7156 { .fno = gen_helper_sve2_sqrshrnt_s },
7157 { .fno = gen_helper_sve2_sqrshrnt_d },
7158 };
7159 return do_sve2_shr_narrow(s, a, ops);
7160}
7161
c13418da
RH
7162static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7163 TCGv_vec n, int64_t shr)
7164{
7165 TCGv_vec t = tcg_temp_new_vec_matching(d);
7166 int halfbits = 4 << vece;
7167
7168 tcg_gen_shri_vec(vece, n, n, shr);
7169 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7170 tcg_gen_umin_vec(vece, d, n, t);
7171 tcg_temp_free_vec(t);
7172}
7173
7174static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7175{
7176 static const TCGOpcode vec_list[] = {
7177 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7178 };
7179 static const GVecGen2i ops[3] = {
7180 { .fniv = gen_uqshrnb_vec,
7181 .opt_opc = vec_list,
7182 .fno = gen_helper_sve2_uqshrnb_h,
7183 .vece = MO_16 },
7184 { .fniv = gen_uqshrnb_vec,
7185 .opt_opc = vec_list,
7186 .fno = gen_helper_sve2_uqshrnb_s,
7187 .vece = MO_32 },
7188 { .fniv = gen_uqshrnb_vec,
7189 .opt_opc = vec_list,
7190 .fno = gen_helper_sve2_uqshrnb_d,
7191 .vece = MO_64 },
7192 };
7193 return do_sve2_shr_narrow(s, a, ops);
7194}
7195
7196static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7197 TCGv_vec n, int64_t shr)
7198{
7199 TCGv_vec t = tcg_temp_new_vec_matching(d);
7200 int halfbits = 4 << vece;
7201
7202 tcg_gen_shri_vec(vece, n, n, shr);
7203 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7204 tcg_gen_umin_vec(vece, n, n, t);
7205 tcg_gen_shli_vec(vece, n, n, halfbits);
7206 tcg_gen_bitsel_vec(vece, d, t, d, n);
7207 tcg_temp_free_vec(t);
7208}
7209
7210static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7211{
7212 static const TCGOpcode vec_list[] = {
7213 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7214 };
7215 static const GVecGen2i ops[3] = {
7216 { .fniv = gen_uqshrnt_vec,
7217 .opt_opc = vec_list,
7218 .load_dest = true,
7219 .fno = gen_helper_sve2_uqshrnt_h,
7220 .vece = MO_16 },
7221 { .fniv = gen_uqshrnt_vec,
7222 .opt_opc = vec_list,
7223 .load_dest = true,
7224 .fno = gen_helper_sve2_uqshrnt_s,
7225 .vece = MO_32 },
7226 { .fniv = gen_uqshrnt_vec,
7227 .opt_opc = vec_list,
7228 .load_dest = true,
7229 .fno = gen_helper_sve2_uqshrnt_d,
7230 .vece = MO_64 },
7231 };
7232 return do_sve2_shr_narrow(s, a, ops);
7233}
7234
7235static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7236{
7237 static const GVecGen2i ops[3] = {
7238 { .fno = gen_helper_sve2_uqrshrnb_h },
7239 { .fno = gen_helper_sve2_uqrshrnb_s },
7240 { .fno = gen_helper_sve2_uqrshrnb_d },
7241 };
7242 return do_sve2_shr_narrow(s, a, ops);
7243}
7244
7245static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7246{
7247 static const GVecGen2i ops[3] = {
7248 { .fno = gen_helper_sve2_uqrshrnt_h },
7249 { .fno = gen_helper_sve2_uqrshrnt_s },
7250 { .fno = gen_helper_sve2_uqrshrnt_d },
7251 };
7252 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7253}
b87dbeeb 7254
40d5ea50 7255#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7256 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7257 NULL, gen_helper_sve2_##name##_h, \
7258 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7259 }; \
bd394cf5
RH
7260 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7261 name##_fns[a->esz], a, 0)
40d5ea50
SL
7262
7263DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7264DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7265DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7266DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7267
c3cd6766
SL
7268DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7269DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7270DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7271DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7272
ef75309b
RH
7273static gen_helper_gvec_flags_4 * const match_fns[4] = {
7274 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7275};
7276TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 7277
ef75309b
RH
7278static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7279 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7280};
7281TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 7282
5880bdc0
RH
7283static gen_helper_gvec_4 * const histcnt_fns[4] = {
7284 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7285};
7286TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7287 histcnt_fns[a->esz], a, 0)
7d47ac94 7288
bd394cf5
RH
7289TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7290 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7291
b87dbeeb
SL
7292static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7293 gen_helper_gvec_4_ptr *fn)
7294{
7295 if (!dc_isar_feature(aa64_sve2, s)) {
7296 return false;
7297 }
7298 return do_zpzz_fp(s, a, fn);
7299}
7300
7301#define DO_SVE2_ZPZZ_FP(NAME, name) \
7302static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7303{ \
7304 static gen_helper_gvec_4_ptr * const fns[4] = { \
7305 NULL, gen_helper_sve2_##name##_zpzz_h, \
7306 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7307 }; \
7308 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7309}
7310
7311DO_SVE2_ZPZZ_FP(FADDP, faddp)
7312DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7313DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7314DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7315DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7316
7317/*
7318 * SVE Integer Multiply-Add (unpredicated)
7319 */
7320
25aee7cc
RH
7321TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7322 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7323TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7324 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
4f26756b 7325
eeb4e84d
RH
7326static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7327 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7328 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7329};
7330TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7331 sqdmlal_zzzw_fns[a->esz], a, 0)
7332TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7333 sqdmlal_zzzw_fns[a->esz], a, 3)
7334TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7335 sqdmlal_zzzw_fns[a->esz], a, 2)
7336
7337static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7338 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7339 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7340};
7341TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7342 sqdmlsl_zzzw_fns[a->esz], a, 0)
7343TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7344 sqdmlsl_zzzw_fns[a->esz], a, 3)
7345TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7346 sqdmlsl_zzzw_fns[a->esz], a, 2)
7347
7348static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7349 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7350 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7351};
7352TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7353 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7354
eeb4e84d
RH
7355static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7356 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7357 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7358};
7359TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7360 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7361
eeb4e84d
RH
7362static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7363 NULL, gen_helper_sve2_smlal_zzzw_h,
7364 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7365};
7366TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7367 smlal_zzzw_fns[a->esz], a, 0)
7368TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7369 smlal_zzzw_fns[a->esz], a, 1)
7370
7371static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7372 NULL, gen_helper_sve2_umlal_zzzw_h,
7373 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7374};
7375TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7376 umlal_zzzw_fns[a->esz], a, 0)
7377TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7378 umlal_zzzw_fns[a->esz], a, 1)
7379
7380static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7381 NULL, gen_helper_sve2_smlsl_zzzw_h,
7382 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7383};
7384TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7385 smlsl_zzzw_fns[a->esz], a, 0)
7386TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7387 smlsl_zzzw_fns[a->esz], a, 1)
7388
7389static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7390 NULL, gen_helper_sve2_umlsl_zzzw_h,
7391 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7392};
7393TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7394 umlsl_zzzw_fns[a->esz], a, 0)
7395TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7396 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7397
5f425b92
RH
7398static gen_helper_gvec_4 * const cmla_fns[] = {
7399 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7400 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7401};
7402TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7403 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7404
5f425b92
RH
7405static gen_helper_gvec_4 * const cdot_fns[] = {
7406 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7407};
7408TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7409 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7410
5f425b92
RH
7411static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7412 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7413 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7414};
7415TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7416 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7417
8740d694
RH
7418TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7419 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7420
0ea3cdbf
RH
7421TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7422 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7423
32e2ad65
RH
7424TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7425 gen_helper_crypto_aese, a, false)
7426TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7427 gen_helper_crypto_aese, a, true)
3cc7a88e 7428
32e2ad65
RH
7429TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7430 gen_helper_crypto_sm4e, a, 0)
7431TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7432 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7433
2aa469ff 7434TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
5c1b7226
RH
7435
7436static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
7437{
7438 if (!dc_isar_feature(aa64_sve2, s)) {
7439 return false;
7440 }
7441 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
7442}
7443
d29b17ca
RH
7444static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
7445{
7446 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7447 return false;
7448 }
7449 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
7450}
7451
5c1b7226
RH
7452static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
7453{
7454 if (!dc_isar_feature(aa64_sve2, s)) {
7455 return false;
7456 }
7457 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
7458}
83c2523f
SL
7459
7460static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
7461{
7462 if (!dc_isar_feature(aa64_sve2, s)) {
7463 return false;
7464 }
7465 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
7466}
7467
7468static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
7469{
7470 if (!dc_isar_feature(aa64_sve2, s)) {
7471 return false;
7472 }
7473 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
7474}
95365277
SL
7475
7476static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
7477{
7478 if (!dc_isar_feature(aa64_sve2, s)) {
7479 return false;
7480 }
7481 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
7482}
7483
7484static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
7485{
7486 if (!dc_isar_feature(aa64_sve2, s)) {
7487 return false;
7488 }
7489 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
7490}
631be02e
SL
7491
7492static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
7493{
7494 static gen_helper_gvec_3_ptr * const fns[] = {
7495 NULL, gen_helper_flogb_h,
7496 gen_helper_flogb_s, gen_helper_flogb_d
7497 };
7498
7499 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
7500 return false;
7501 }
7502 if (sve_access_check(s)) {
7503 TCGv_ptr status =
7504 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7505 unsigned vsz = vec_full_reg_size(s);
7506
7507 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
7508 vec_full_reg_offset(s, a->rn),
7509 pred_full_reg_offset(s, a->pg),
7510 status, vsz, vsz, 0, fns[a->esz]);
7511 tcg_temp_free_ptr(status);
7512 }
7513 return true;
7514}
50d102bd
SL
7515
7516static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7517{
7518 if (!dc_isar_feature(aa64_sve2, s)) {
7519 return false;
7520 }
41bf9b67
RH
7521 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7522 a->rd, a->rn, a->rm, a->ra,
7523 (sel << 1) | sub, cpu_env);
50d102bd
SL
7524}
7525
7526static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7527{
7528 return do_FMLAL_zzzw(s, a, false, false);
7529}
7530
7531static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7532{
7533 return do_FMLAL_zzzw(s, a, false, true);
7534}
7535
7536static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7537{
7538 return do_FMLAL_zzzw(s, a, true, false);
7539}
7540
7541static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7542{
7543 return do_FMLAL_zzzw(s, a, true, true);
7544}
7545
7546static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7547{
7548 if (!dc_isar_feature(aa64_sve2, s)) {
7549 return false;
7550 }
41bf9b67
RH
7551 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7552 a->rd, a->rn, a->rm, a->ra,
7553 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7554}
7555
7556static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7557{
7558 return do_FMLAL_zzxw(s, a, false, false);
7559}
7560
7561static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7562{
7563 return do_FMLAL_zzxw(s, a, false, true);
7564}
7565
7566static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7567{
7568 return do_FMLAL_zzxw(s, a, true, false);
7569}
7570
7571static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7572{
7573 return do_FMLAL_zzxw(s, a, true, true);
7574}
2323c5ff 7575
eec05e4e
RH
7576TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7577 gen_helper_gvec_smmla_b, a, 0)
7578TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7579 gen_helper_gvec_usmmla_b, a, 0)
7580TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7581 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7582
eec05e4e
RH
7583TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7584 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7585TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7586 gen_helper_gvec_bfdot_idx, a)
81266a1f 7587
eec05e4e
RH
7588TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7589 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7590
7591static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7592{
7593 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7594 return false;
7595 }
41bf9b67
RH
7596 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7597 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7598}
7599
7600static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7601{
7602 return do_BFMLAL_zzzw(s, a, false);
7603}
7604
7605static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7606{
7607 return do_BFMLAL_zzzw(s, a, true);
7608}
458d0ab6
RH
7609
7610static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7611{
7612 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7613 return false;
7614 }
41bf9b67
RH
7615 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7616 a->rd, a->rn, a->rm, a->ra,
7617 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7618}
7619
7620static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7621{
7622 return do_BFMLAL_zzxw(s, a, false);
7623}
7624
7625static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7626{
7627 return do_BFMLAL_zzxw(s, a, true);
7628}