]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use tcg_constant for trans_INDEX_*
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a
RH
139/* Invoke an out-of-line helper on 2 Zregs. */
140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
141 int rd, int rn, int data)
142{
143 unsigned vsz = vec_full_reg_size(s);
144 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
145 vec_full_reg_offset(s, rn),
146 vsz, vsz, data, fn);
147}
148
e645d1a1
RH
149/* Invoke an out-of-line helper on 3 Zregs. */
150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
151 int rd, int rn, int rm, int data)
152{
153 unsigned vsz = vec_full_reg_size(s);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
155 vec_full_reg_offset(s, rn),
156 vec_full_reg_offset(s, rm),
157 vsz, vsz, data, fn);
158}
159
38650638
RH
160/* Invoke an out-of-line helper on 4 Zregs. */
161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
162 int rd, int rn, int rm, int ra, int data)
163{
164 unsigned vsz = vec_full_reg_size(s);
165 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
166 vec_full_reg_offset(s, rn),
167 vec_full_reg_offset(s, rm),
168 vec_full_reg_offset(s, ra),
169 vsz, vsz, data, fn);
170}
171
96a461f7
RH
172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
174 int rd, int rn, int pg, int data)
175{
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
178 vec_full_reg_offset(s, rn),
179 pred_full_reg_offset(s, pg),
180 vsz, vsz, data, fn);
181}
182
36cbb7a8
RH
183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
185 int rd, int rn, int rm, int pg, int data)
186{
187 unsigned vsz = vec_full_reg_size(s);
188 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
189 vec_full_reg_offset(s, rn),
190 vec_full_reg_offset(s, rm),
191 pred_full_reg_offset(s, pg),
192 vsz, vsz, data, fn);
193}
f7d79c41 194
36cbb7a8 195/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
197 int esz, int rd, int rn)
38388f7e 198{
f7d79c41
RH
199 unsigned vsz = vec_full_reg_size(s);
200 gvec_fn(esz, vec_full_reg_offset(s, rd),
201 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
202}
203
39eea561 204/* Invoke a vector expander on three Zregs. */
28c4da31
RH
205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
206 int esz, int rd, int rn, int rm)
38388f7e 207{
28c4da31
RH
208 unsigned vsz = vec_full_reg_size(s);
209 gvec_fn(esz, vec_full_reg_offset(s, rd),
210 vec_full_reg_offset(s, rn),
211 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
212}
213
911cdc6d
RH
214/* Invoke a vector expander on four Zregs. */
215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
216 int esz, int rd, int rn, int rm, int ra)
217{
218 unsigned vsz = vec_full_reg_size(s);
219 gvec_fn(esz, vec_full_reg_offset(s, rd),
220 vec_full_reg_offset(s, rn),
221 vec_full_reg_offset(s, rm),
222 vec_full_reg_offset(s, ra), vsz, vsz);
223}
224
39eea561
RH
225/* Invoke a vector move on two Zregs. */
226static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 227{
f7d79c41
RH
228 if (sve_access_check(s)) {
229 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
230 }
231 return true;
38388f7e
RH
232}
233
d9d78dcc
RH
234/* Initialize a Zreg with replications of a 64-bit immediate. */
235static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
236{
237 unsigned vsz = vec_full_reg_size(s);
8711e71f 238 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
239}
240
516e246a 241/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
243 int rd, int rn, int rm)
516e246a 244{
dd81a8d7
RH
245 unsigned psz = pred_gvec_reg_size(s);
246 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
247 pred_full_reg_offset(s, rn),
248 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
249}
250
251/* Invoke a vector move on two Pregs. */
252static bool do_mov_p(DisasContext *s, int rd, int rn)
253{
d0b2df5a
RH
254 if (sve_access_check(s)) {
255 unsigned psz = pred_gvec_reg_size(s);
256 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
257 pred_full_reg_offset(s, rn), psz, psz);
258 }
259 return true;
516e246a
RH
260}
261
9e18d7a6
RH
262/* Set the cpu flags as per a return from an SVE helper. */
263static void do_pred_flags(TCGv_i32 t)
264{
265 tcg_gen_mov_i32(cpu_NF, t);
266 tcg_gen_andi_i32(cpu_ZF, t, 2);
267 tcg_gen_andi_i32(cpu_CF, t, 1);
268 tcg_gen_movi_i32(cpu_VF, 0);
269}
270
271/* Subroutines computing the ARM PredTest psuedofunction. */
272static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
273{
274 TCGv_i32 t = tcg_temp_new_i32();
275
276 gen_helper_sve_predtest1(t, d, g);
277 do_pred_flags(t);
278 tcg_temp_free_i32(t);
279}
280
281static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
282{
283 TCGv_ptr dptr = tcg_temp_new_ptr();
284 TCGv_ptr gptr = tcg_temp_new_ptr();
285 TCGv_i32 t;
286
287 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
288 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
289 t = tcg_const_i32(words);
290
291 gen_helper_sve_predtest(t, dptr, gptr, t);
292 tcg_temp_free_ptr(dptr);
293 tcg_temp_free_ptr(gptr);
294
295 do_pred_flags(t);
296 tcg_temp_free_i32(t);
297}
298
028e2a7b
RH
299/* For each element size, the bits within a predicate word that are active. */
300const uint64_t pred_esz_masks[4] = {
301 0xffffffffffffffffull, 0x5555555555555555ull,
302 0x1111111111111111ull, 0x0101010101010101ull
303};
304
39eea561
RH
305/*
306 *** SVE Logical - Unpredicated Group
307 */
308
28c4da31
RH
309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
310{
311 if (sve_access_check(s)) {
312 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
313 }
314 return true;
315}
316
3a7be554 317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 318{
28c4da31 319 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
320}
321
3a7be554 322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 323{
28c4da31 324 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
325}
326
3a7be554 327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 328{
28c4da31 329 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
330}
331
3a7be554 332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 333{
28c4da31 334 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 335}
d1822297 336
e6eba6e5
RH
337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
338{
339 TCGv_i64 t = tcg_temp_new_i64();
340 uint64_t mask = dup_const(MO_8, 0xff >> sh);
341
342 tcg_gen_xor_i64(t, n, m);
343 tcg_gen_shri_i64(d, t, sh);
344 tcg_gen_shli_i64(t, t, 8 - sh);
345 tcg_gen_andi_i64(d, d, mask);
346 tcg_gen_andi_i64(t, t, ~mask);
347 tcg_gen_or_i64(d, d, t);
348 tcg_temp_free_i64(t);
349}
350
351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
352{
353 TCGv_i64 t = tcg_temp_new_i64();
354 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
355
356 tcg_gen_xor_i64(t, n, m);
357 tcg_gen_shri_i64(d, t, sh);
358 tcg_gen_shli_i64(t, t, 16 - sh);
359 tcg_gen_andi_i64(d, d, mask);
360 tcg_gen_andi_i64(t, t, ~mask);
361 tcg_gen_or_i64(d, d, t);
362 tcg_temp_free_i64(t);
363}
364
365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
366{
367 tcg_gen_xor_i32(d, n, m);
368 tcg_gen_rotri_i32(d, d, sh);
369}
370
371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
372{
373 tcg_gen_xor_i64(d, n, m);
374 tcg_gen_rotri_i64(d, d, sh);
375}
376
377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
378 TCGv_vec m, int64_t sh)
379{
380 tcg_gen_xor_vec(vece, d, n, m);
381 tcg_gen_rotri_vec(vece, d, d, sh);
382}
383
384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
385 uint32_t rm_ofs, int64_t shift,
386 uint32_t opr_sz, uint32_t max_sz)
387{
388 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
389 static const GVecGen3i ops[4] = {
390 { .fni8 = gen_xar8_i64,
391 .fniv = gen_xar_vec,
392 .fno = gen_helper_sve2_xar_b,
393 .opt_opc = vecop,
394 .vece = MO_8 },
395 { .fni8 = gen_xar16_i64,
396 .fniv = gen_xar_vec,
397 .fno = gen_helper_sve2_xar_h,
398 .opt_opc = vecop,
399 .vece = MO_16 },
400 { .fni4 = gen_xar_i32,
401 .fniv = gen_xar_vec,
402 .fno = gen_helper_sve2_xar_s,
403 .opt_opc = vecop,
404 .vece = MO_32 },
405 { .fni8 = gen_xar_i64,
406 .fniv = gen_xar_vec,
407 .fno = gen_helper_gvec_xar_d,
408 .opt_opc = vecop,
409 .vece = MO_64 }
410 };
411 int esize = 8 << vece;
412
413 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
414 tcg_debug_assert(shift >= 0);
415 tcg_debug_assert(shift <= esize);
416 shift &= esize - 1;
417
418 if (shift == 0) {
419 /* xar with no rotate devolves to xor. */
420 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
421 } else {
422 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
423 shift, &ops[vece]);
424 }
425}
426
427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
428{
429 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
430 return false;
431 }
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
435 vec_full_reg_offset(s, a->rn),
436 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
437 }
438 return true;
439}
440
911cdc6d
RH
441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
442{
443 if (!dc_isar_feature(aa64_sve2, s)) {
444 return false;
445 }
446 if (sve_access_check(s)) {
447 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
448 }
449 return true;
450}
451
452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
453{
454 tcg_gen_xor_i64(d, n, m);
455 tcg_gen_xor_i64(d, d, k);
456}
457
458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
459 TCGv_vec m, TCGv_vec k)
460{
461 tcg_gen_xor_vec(vece, d, n, m);
462 tcg_gen_xor_vec(vece, d, d, k);
463}
464
465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
466 uint32_t a, uint32_t oprsz, uint32_t maxsz)
467{
468 static const GVecGen4 op = {
469 .fni8 = gen_eor3_i64,
470 .fniv = gen_eor3_vec,
471 .fno = gen_helper_sve2_eor3,
472 .vece = MO_64,
473 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
474 };
475 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
476}
477
478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
479{
480 return do_sve2_zzzz_fn(s, a, gen_eor3);
481}
482
483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
484{
485 tcg_gen_andc_i64(d, m, k);
486 tcg_gen_xor_i64(d, d, n);
487}
488
489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
490 TCGv_vec m, TCGv_vec k)
491{
492 tcg_gen_andc_vec(vece, d, m, k);
493 tcg_gen_xor_vec(vece, d, d, n);
494}
495
496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
497 uint32_t a, uint32_t oprsz, uint32_t maxsz)
498{
499 static const GVecGen4 op = {
500 .fni8 = gen_bcax_i64,
501 .fniv = gen_bcax_vec,
502 .fno = gen_helper_sve2_bcax,
503 .vece = MO_64,
504 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
505 };
506 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
507}
508
509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
510{
511 return do_sve2_zzzz_fn(s, a, gen_bcax);
512}
513
514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
515 uint32_t a, uint32_t oprsz, uint32_t maxsz)
516{
517 /* BSL differs from the generic bitsel in argument ordering. */
518 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
519}
520
521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
522{
523 return do_sve2_zzzz_fn(s, a, gen_bsl);
524}
525
526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
527{
528 tcg_gen_andc_i64(n, k, n);
529 tcg_gen_andc_i64(m, m, k);
530 tcg_gen_or_i64(d, n, m);
531}
532
533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
534 TCGv_vec m, TCGv_vec k)
535{
536 if (TCG_TARGET_HAS_bitsel_vec) {
537 tcg_gen_not_vec(vece, n, n);
538 tcg_gen_bitsel_vec(vece, d, k, n, m);
539 } else {
540 tcg_gen_andc_vec(vece, n, k, n);
541 tcg_gen_andc_vec(vece, m, m, k);
542 tcg_gen_or_vec(vece, d, n, m);
543 }
544}
545
546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
547 uint32_t a, uint32_t oprsz, uint32_t maxsz)
548{
549 static const GVecGen4 op = {
550 .fni8 = gen_bsl1n_i64,
551 .fniv = gen_bsl1n_vec,
552 .fno = gen_helper_sve2_bsl1n,
553 .vece = MO_64,
554 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
555 };
556 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
557}
558
559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
560{
561 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
562}
563
564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
565{
566 /*
567 * Z[dn] = (n & k) | (~m & ~k)
568 * = | ~(m | k)
569 */
570 tcg_gen_and_i64(n, n, k);
571 if (TCG_TARGET_HAS_orc_i64) {
572 tcg_gen_or_i64(m, m, k);
573 tcg_gen_orc_i64(d, n, m);
574 } else {
575 tcg_gen_nor_i64(m, m, k);
576 tcg_gen_or_i64(d, n, m);
577 }
578}
579
580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
581 TCGv_vec m, TCGv_vec k)
582{
583 if (TCG_TARGET_HAS_bitsel_vec) {
584 tcg_gen_not_vec(vece, m, m);
585 tcg_gen_bitsel_vec(vece, d, k, n, m);
586 } else {
587 tcg_gen_and_vec(vece, n, n, k);
588 tcg_gen_or_vec(vece, m, m, k);
589 tcg_gen_orc_vec(vece, d, n, m);
590 }
591}
592
593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
594 uint32_t a, uint32_t oprsz, uint32_t maxsz)
595{
596 static const GVecGen4 op = {
597 .fni8 = gen_bsl2n_i64,
598 .fniv = gen_bsl2n_vec,
599 .fno = gen_helper_sve2_bsl2n,
600 .vece = MO_64,
601 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
602 };
603 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
604}
605
606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
607{
608 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
609}
610
611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
612{
613 tcg_gen_and_i64(n, n, k);
614 tcg_gen_andc_i64(m, m, k);
615 tcg_gen_nor_i64(d, n, m);
616}
617
618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
619 TCGv_vec m, TCGv_vec k)
620{
621 tcg_gen_bitsel_vec(vece, d, k, n, m);
622 tcg_gen_not_vec(vece, d, d);
623}
624
625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
626 uint32_t a, uint32_t oprsz, uint32_t maxsz)
627{
628 static const GVecGen4 op = {
629 .fni8 = gen_nbsl_i64,
630 .fniv = gen_nbsl_vec,
631 .fno = gen_helper_sve2_nbsl,
632 .vece = MO_64,
633 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
634 };
635 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
636}
637
638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
639{
640 return do_sve2_zzzz_fn(s, a, gen_nbsl);
641}
642
fea98f9c
RH
643/*
644 *** SVE Integer Arithmetic - Unpredicated Group
645 */
646
3a7be554 647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 648{
28c4da31 649 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
650}
651
3a7be554 652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 653{
28c4da31 654 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
655}
656
3a7be554 657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 658{
28c4da31 659 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
660}
661
3a7be554 662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 663{
28c4da31 664 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
665}
666
3a7be554 667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 668{
28c4da31 669 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
670}
671
3a7be554 672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 673{
28c4da31 674 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
675}
676
f97cfd59
RH
677/*
678 *** SVE Integer Arithmetic - Binary Predicated Group
679 */
680
681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
682{
f97cfd59
RH
683 if (fn == NULL) {
684 return false;
685 }
686 if (sve_access_check(s)) {
36cbb7a8 687 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
688 }
689 return true;
690}
691
a2103582
RH
692/* Select active elememnts from Zn and inactive elements from Zm,
693 * storing the result in Zd.
694 */
695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
696{
697 static gen_helper_gvec_4 * const fns[4] = {
698 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
699 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
700 };
36cbb7a8 701 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
702}
703
f97cfd59 704#define DO_ZPZZ(NAME, name) \
3a7be554 705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
706{ \
707 static gen_helper_gvec_4 * const fns[4] = { \
708 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
709 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
710 }; \
711 return do_zpzz_ool(s, a, fns[a->esz]); \
712}
713
714DO_ZPZZ(AND, and)
715DO_ZPZZ(EOR, eor)
716DO_ZPZZ(ORR, orr)
717DO_ZPZZ(BIC, bic)
718
719DO_ZPZZ(ADD, add)
720DO_ZPZZ(SUB, sub)
721
722DO_ZPZZ(SMAX, smax)
723DO_ZPZZ(UMAX, umax)
724DO_ZPZZ(SMIN, smin)
725DO_ZPZZ(UMIN, umin)
726DO_ZPZZ(SABD, sabd)
727DO_ZPZZ(UABD, uabd)
728
729DO_ZPZZ(MUL, mul)
730DO_ZPZZ(SMULH, smulh)
731DO_ZPZZ(UMULH, umulh)
732
27721dbb
RH
733DO_ZPZZ(ASR, asr)
734DO_ZPZZ(LSR, lsr)
735DO_ZPZZ(LSL, lsl)
736
3a7be554 737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
738{
739 static gen_helper_gvec_4 * const fns[4] = {
740 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
741 };
742 return do_zpzz_ool(s, a, fns[a->esz]);
743}
744
3a7be554 745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
746{
747 static gen_helper_gvec_4 * const fns[4] = {
748 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
749 };
750 return do_zpzz_ool(s, a, fns[a->esz]);
751}
752
3a7be554 753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
754{
755 if (sve_access_check(s)) {
756 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
757 }
758 return true;
759}
d3fe4a29 760
f97cfd59
RH
761#undef DO_ZPZZ
762
afac6d04
RH
763/*
764 *** SVE Integer Arithmetic - Unary Predicated Group
765 */
766
767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
768{
769 if (fn == NULL) {
770 return false;
771 }
772 if (sve_access_check(s)) {
96a461f7 773 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
774 }
775 return true;
776}
777
778#define DO_ZPZ(NAME, name) \
3a7be554 779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
780{ \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
783 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
784 }; \
785 return do_zpz_ool(s, a, fns[a->esz]); \
786}
787
788DO_ZPZ(CLS, cls)
789DO_ZPZ(CLZ, clz)
790DO_ZPZ(CNT_zpz, cnt_zpz)
791DO_ZPZ(CNOT, cnot)
792DO_ZPZ(NOT_zpz, not_zpz)
793DO_ZPZ(ABS, abs)
794DO_ZPZ(NEG, neg)
795
3a7be554 796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
797{
798 static gen_helper_gvec_3 * const fns[4] = {
799 NULL,
800 gen_helper_sve_fabs_h,
801 gen_helper_sve_fabs_s,
802 gen_helper_sve_fabs_d
803 };
804 return do_zpz_ool(s, a, fns[a->esz]);
805}
806
3a7be554 807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
808{
809 static gen_helper_gvec_3 * const fns[4] = {
810 NULL,
811 gen_helper_sve_fneg_h,
812 gen_helper_sve_fneg_s,
813 gen_helper_sve_fneg_d
814 };
815 return do_zpz_ool(s, a, fns[a->esz]);
816}
817
3a7be554 818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
819{
820 static gen_helper_gvec_3 * const fns[4] = {
821 NULL,
822 gen_helper_sve_sxtb_h,
823 gen_helper_sve_sxtb_s,
824 gen_helper_sve_sxtb_d
825 };
826 return do_zpz_ool(s, a, fns[a->esz]);
827}
828
3a7be554 829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
830{
831 static gen_helper_gvec_3 * const fns[4] = {
832 NULL,
833 gen_helper_sve_uxtb_h,
834 gen_helper_sve_uxtb_s,
835 gen_helper_sve_uxtb_d
836 };
837 return do_zpz_ool(s, a, fns[a->esz]);
838}
839
3a7be554 840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
841{
842 static gen_helper_gvec_3 * const fns[4] = {
843 NULL, NULL,
844 gen_helper_sve_sxth_s,
845 gen_helper_sve_sxth_d
846 };
847 return do_zpz_ool(s, a, fns[a->esz]);
848}
849
3a7be554 850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
851{
852 static gen_helper_gvec_3 * const fns[4] = {
853 NULL, NULL,
854 gen_helper_sve_uxth_s,
855 gen_helper_sve_uxth_d
856 };
857 return do_zpz_ool(s, a, fns[a->esz]);
858}
859
3a7be554 860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
861{
862 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
863}
864
3a7be554 865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
866{
867 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
868}
869
870#undef DO_ZPZ
871
047cec97
RH
872/*
873 *** SVE Integer Reduction Group
874 */
875
876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
878 gen_helper_gvec_reduc *fn)
879{
880 unsigned vsz = vec_full_reg_size(s);
881 TCGv_ptr t_zn, t_pg;
882 TCGv_i32 desc;
883 TCGv_i64 temp;
884
885 if (fn == NULL) {
886 return false;
887 }
888 if (!sve_access_check(s)) {
889 return true;
890 }
891
892 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
893 temp = tcg_temp_new_i64();
894 t_zn = tcg_temp_new_ptr();
895 t_pg = tcg_temp_new_ptr();
896
897 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
898 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
899 fn(temp, t_zn, t_pg, desc);
900 tcg_temp_free_ptr(t_zn);
901 tcg_temp_free_ptr(t_pg);
902 tcg_temp_free_i32(desc);
903
904 write_fp_dreg(s, a->rd, temp);
905 tcg_temp_free_i64(temp);
906 return true;
907}
908
909#define DO_VPZ(NAME, name) \
3a7be554 910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
911{ \
912 static gen_helper_gvec_reduc * const fns[4] = { \
913 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
915 }; \
916 return do_vpz_ool(s, a, fns[a->esz]); \
917}
918
919DO_VPZ(ORV, orv)
920DO_VPZ(ANDV, andv)
921DO_VPZ(EORV, eorv)
922
923DO_VPZ(UADDV, uaddv)
924DO_VPZ(SMAXV, smaxv)
925DO_VPZ(UMAXV, umaxv)
926DO_VPZ(SMINV, sminv)
927DO_VPZ(UMINV, uminv)
928
3a7be554 929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
930{
931 static gen_helper_gvec_reduc * const fns[4] = {
932 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
933 gen_helper_sve_saddv_s, NULL
934 };
935 return do_vpz_ool(s, a, fns[a->esz]);
936}
937
938#undef DO_VPZ
939
ccd841c3
RH
940/*
941 *** SVE Shift by Immediate - Predicated Group
942 */
943
60245996
RH
944/*
945 * Copy Zn into Zd, storing zeros into inactive elements.
946 * If invert, store zeros into the active elements.
ccd841c3 947 */
60245996
RH
948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
949 int esz, bool invert)
ccd841c3 950{
60245996
RH
951 static gen_helper_gvec_3 * const fns[4] = {
952 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
953 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 954 };
60245996 955
ccd841c3 956 if (sve_access_check(s)) {
96a461f7 957 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
958 }
959 return true;
960}
961
962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
963 gen_helper_gvec_3 *fn)
964{
965 if (sve_access_check(s)) {
96a461f7 966 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
967 }
968 return true;
969}
970
3a7be554 971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
972{
973 static gen_helper_gvec_3 * const fns[4] = {
974 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
975 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
976 };
977 if (a->esz < 0) {
978 /* Invalid tsz encoding -- see tszimm_esz. */
979 return false;
980 }
981 /* Shift by element size is architecturally valid. For
982 arithmetic right-shift, it's the same as by one less. */
983 a->imm = MIN(a->imm, (8 << a->esz) - 1);
984 return do_zpzi_ool(s, a, fns[a->esz]);
985}
986
3a7be554 987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
988{
989 static gen_helper_gvec_3 * const fns[4] = {
990 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
991 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
992 };
993 if (a->esz < 0) {
994 return false;
995 }
996 /* Shift by element size is architecturally valid.
997 For logical shifts, it is a zeroing operation. */
998 if (a->imm >= (8 << a->esz)) {
60245996 999 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1000 } else {
1001 return do_zpzi_ool(s, a, fns[a->esz]);
1002 }
1003}
1004
3a7be554 1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1006{
1007 static gen_helper_gvec_3 * const fns[4] = {
1008 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1009 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1010 };
1011 if (a->esz < 0) {
1012 return false;
1013 }
1014 /* Shift by element size is architecturally valid.
1015 For logical shifts, it is a zeroing operation. */
1016 if (a->imm >= (8 << a->esz)) {
60245996 1017 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1018 } else {
1019 return do_zpzi_ool(s, a, fns[a->esz]);
1020 }
1021}
1022
3a7be554 1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1024{
1025 static gen_helper_gvec_3 * const fns[4] = {
1026 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1027 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1028 };
1029 if (a->esz < 0) {
1030 return false;
1031 }
1032 /* Shift by element size is architecturally valid. For arithmetic
1033 right shift for division, it is a zeroing operation. */
1034 if (a->imm >= (8 << a->esz)) {
60245996 1035 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1036 } else {
1037 return do_zpzi_ool(s, a, fns[a->esz]);
1038 }
1039}
1040
a5421b54
SL
1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1042{
1043 static gen_helper_gvec_3 * const fns[4] = {
1044 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1045 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1046 };
1047 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1048 return false;
1049 }
1050 return do_zpzi_ool(s, a, fns[a->esz]);
1051}
1052
1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1054{
1055 static gen_helper_gvec_3 * const fns[4] = {
1056 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1057 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1058 };
1059 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1060 return false;
1061 }
1062 return do_zpzi_ool(s, a, fns[a->esz]);
1063}
1064
1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1066{
1067 static gen_helper_gvec_3 * const fns[4] = {
1068 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1069 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1070 };
1071 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1072 return false;
1073 }
1074 return do_zpzi_ool(s, a, fns[a->esz]);
1075}
1076
1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1078{
1079 static gen_helper_gvec_3 * const fns[4] = {
1080 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1081 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1082 };
1083 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1084 return false;
1085 }
1086 return do_zpzi_ool(s, a, fns[a->esz]);
1087}
1088
1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1090{
1091 static gen_helper_gvec_3 * const fns[4] = {
1092 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1093 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1094 };
1095 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1096 return false;
1097 }
1098 return do_zpzi_ool(s, a, fns[a->esz]);
1099}
1100
fe7f8dfb
RH
1101/*
1102 *** SVE Bitwise Shift - Predicated Group
1103 */
1104
1105#define DO_ZPZW(NAME, name) \
3a7be554 1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1107{ \
1108 static gen_helper_gvec_4 * const fns[3] = { \
1109 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1110 gen_helper_sve_##name##_zpzw_s, \
1111 }; \
1112 if (a->esz < 0 || a->esz >= 3) { \
1113 return false; \
1114 } \
1115 return do_zpzz_ool(s, a, fns[a->esz]); \
1116}
1117
1118DO_ZPZW(ASR, asr)
1119DO_ZPZW(LSR, lsr)
1120DO_ZPZW(LSL, lsl)
1121
1122#undef DO_ZPZW
1123
d9d78dcc
RH
1124/*
1125 *** SVE Bitwise Shift - Unpredicated Group
1126 */
1127
1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1129 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1130 int64_t, uint32_t, uint32_t))
1131{
1132 if (a->esz < 0) {
1133 /* Invalid tsz encoding -- see tszimm_esz. */
1134 return false;
1135 }
1136 if (sve_access_check(s)) {
1137 unsigned vsz = vec_full_reg_size(s);
1138 /* Shift by element size is architecturally valid. For
1139 arithmetic right-shift, it's the same as by one less.
1140 Otherwise it is a zeroing operation. */
1141 if (a->imm >= 8 << a->esz) {
1142 if (asr) {
1143 a->imm = (8 << a->esz) - 1;
1144 } else {
1145 do_dupi_z(s, a->rd, 0);
1146 return true;
1147 }
1148 }
1149 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1150 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1151 }
1152 return true;
1153}
1154
3a7be554 1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1156{
1157 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1158}
1159
3a7be554 1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1161{
1162 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1163}
1164
3a7be554 1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1166{
1167 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1168}
1169
1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1171{
1172 if (fn == NULL) {
1173 return false;
1174 }
1175 if (sve_access_check(s)) {
e645d1a1 1176 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
1177 }
1178 return true;
1179}
1180
1181#define DO_ZZW(NAME, name) \
3a7be554 1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
1183{ \
1184 static gen_helper_gvec_3 * const fns[4] = { \
1185 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1186 gen_helper_sve_##name##_zzw_s, NULL \
1187 }; \
1188 return do_zzw_ool(s, a, fns[a->esz]); \
1189}
1190
1191DO_ZZW(ASR, asr)
1192DO_ZZW(LSR, lsr)
1193DO_ZZW(LSL, lsl)
1194
1195#undef DO_ZZW
1196
96a36e4a
RH
1197/*
1198 *** SVE Integer Multiply-Add Group
1199 */
1200
1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1202 gen_helper_gvec_5 *fn)
1203{
1204 if (sve_access_check(s)) {
1205 unsigned vsz = vec_full_reg_size(s);
1206 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1207 vec_full_reg_offset(s, a->ra),
1208 vec_full_reg_offset(s, a->rn),
1209 vec_full_reg_offset(s, a->rm),
1210 pred_full_reg_offset(s, a->pg),
1211 vsz, vsz, 0, fn);
1212 }
1213 return true;
1214}
1215
1216#define DO_ZPZZZ(NAME, name) \
3a7be554 1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1218{ \
1219 static gen_helper_gvec_5 * const fns[4] = { \
1220 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1221 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1222 }; \
1223 return do_zpzzz_ool(s, a, fns[a->esz]); \
1224}
1225
1226DO_ZPZZZ(MLA, mla)
1227DO_ZPZZZ(MLS, mls)
1228
1229#undef DO_ZPZZZ
1230
9a56c9c3
RH
1231/*
1232 *** SVE Index Generation Group
1233 */
1234
1235static void do_index(DisasContext *s, int esz, int rd,
1236 TCGv_i64 start, TCGv_i64 incr)
1237{
1238 unsigned vsz = vec_full_reg_size(s);
1239 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1240 TCGv_ptr t_zd = tcg_temp_new_ptr();
1241
1242 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1243 if (esz == 3) {
1244 gen_helper_sve_index_d(t_zd, start, incr, desc);
1245 } else {
1246 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1247 static index_fn * const fns[3] = {
1248 gen_helper_sve_index_b,
1249 gen_helper_sve_index_h,
1250 gen_helper_sve_index_s,
1251 };
1252 TCGv_i32 s32 = tcg_temp_new_i32();
1253 TCGv_i32 i32 = tcg_temp_new_i32();
1254
1255 tcg_gen_extrl_i64_i32(s32, start);
1256 tcg_gen_extrl_i64_i32(i32, incr);
1257 fns[esz](t_zd, s32, i32, desc);
1258
1259 tcg_temp_free_i32(s32);
1260 tcg_temp_free_i32(i32);
1261 }
1262 tcg_temp_free_ptr(t_zd);
1263 tcg_temp_free_i32(desc);
1264}
1265
3a7be554 1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1267{
1268 if (sve_access_check(s)) {
b0c3aece
RH
1269 TCGv_i64 start = tcg_constant_i64(a->imm1);
1270 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1271 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1272 }
1273 return true;
1274}
1275
3a7be554 1276static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1277{
1278 if (sve_access_check(s)) {
b0c3aece 1279 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1280 TCGv_i64 incr = cpu_reg(s, a->rm);
1281 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1282 }
1283 return true;
1284}
1285
3a7be554 1286static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1287{
1288 if (sve_access_check(s)) {
1289 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1290 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1291 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1292 }
1293 return true;
1294}
1295
3a7be554 1296static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1297{
1298 if (sve_access_check(s)) {
1299 TCGv_i64 start = cpu_reg(s, a->rn);
1300 TCGv_i64 incr = cpu_reg(s, a->rm);
1301 do_index(s, a->esz, a->rd, start, incr);
1302 }
1303 return true;
1304}
1305
96f922cc
RH
1306/*
1307 *** SVE Stack Allocation Group
1308 */
1309
3a7be554 1310static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1311{
5de56742
AC
1312 if (sve_access_check(s)) {
1313 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1314 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1315 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1316 }
96f922cc
RH
1317 return true;
1318}
1319
3a7be554 1320static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1321{
5de56742
AC
1322 if (sve_access_check(s)) {
1323 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1324 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1325 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1326 }
96f922cc
RH
1327 return true;
1328}
1329
3a7be554 1330static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1331{
5de56742
AC
1332 if (sve_access_check(s)) {
1333 TCGv_i64 reg = cpu_reg(s, a->rd);
1334 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1335 }
96f922cc
RH
1336 return true;
1337}
1338
4b242d9c
RH
1339/*
1340 *** SVE Compute Vector Address Group
1341 */
1342
1343static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1344{
1345 if (sve_access_check(s)) {
e645d1a1 1346 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1347 }
1348 return true;
1349}
1350
3a7be554 1351static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1352{
1353 return do_adr(s, a, gen_helper_sve_adr_p32);
1354}
1355
3a7be554 1356static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1357{
1358 return do_adr(s, a, gen_helper_sve_adr_p64);
1359}
1360
3a7be554 1361static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1362{
1363 return do_adr(s, a, gen_helper_sve_adr_s32);
1364}
1365
3a7be554 1366static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1367{
1368 return do_adr(s, a, gen_helper_sve_adr_u32);
1369}
1370
0762cd42
RH
1371/*
1372 *** SVE Integer Misc - Unpredicated Group
1373 */
1374
3a7be554 1375static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
1376{
1377 static gen_helper_gvec_2 * const fns[4] = {
1378 NULL,
1379 gen_helper_sve_fexpa_h,
1380 gen_helper_sve_fexpa_s,
1381 gen_helper_sve_fexpa_d,
1382 };
1383 if (a->esz == 0) {
1384 return false;
1385 }
1386 if (sve_access_check(s)) {
40e32e5a 1387 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1388 }
1389 return true;
1390}
1391
3a7be554 1392static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1393{
1394 static gen_helper_gvec_3 * const fns[4] = {
1395 NULL,
1396 gen_helper_sve_ftssel_h,
1397 gen_helper_sve_ftssel_s,
1398 gen_helper_sve_ftssel_d,
1399 };
1400 if (a->esz == 0) {
1401 return false;
1402 }
1403 if (sve_access_check(s)) {
e645d1a1 1404 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1405 }
1406 return true;
1407}
1408
516e246a
RH
1409/*
1410 *** SVE Predicate Logical Operations Group
1411 */
1412
1413static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1414 const GVecGen4 *gvec_op)
1415{
1416 if (!sve_access_check(s)) {
1417 return true;
1418 }
1419
1420 unsigned psz = pred_gvec_reg_size(s);
1421 int dofs = pred_full_reg_offset(s, a->rd);
1422 int nofs = pred_full_reg_offset(s, a->rn);
1423 int mofs = pred_full_reg_offset(s, a->rm);
1424 int gofs = pred_full_reg_offset(s, a->pg);
1425
dd81a8d7
RH
1426 if (!a->s) {
1427 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1428 return true;
1429 }
1430
516e246a
RH
1431 if (psz == 8) {
1432 /* Do the operation and the flags generation in temps. */
1433 TCGv_i64 pd = tcg_temp_new_i64();
1434 TCGv_i64 pn = tcg_temp_new_i64();
1435 TCGv_i64 pm = tcg_temp_new_i64();
1436 TCGv_i64 pg = tcg_temp_new_i64();
1437
1438 tcg_gen_ld_i64(pn, cpu_env, nofs);
1439 tcg_gen_ld_i64(pm, cpu_env, mofs);
1440 tcg_gen_ld_i64(pg, cpu_env, gofs);
1441
1442 gvec_op->fni8(pd, pn, pm, pg);
1443 tcg_gen_st_i64(pd, cpu_env, dofs);
1444
1445 do_predtest1(pd, pg);
1446
1447 tcg_temp_free_i64(pd);
1448 tcg_temp_free_i64(pn);
1449 tcg_temp_free_i64(pm);
1450 tcg_temp_free_i64(pg);
1451 } else {
1452 /* The operation and flags generation is large. The computation
1453 * of the flags depends on the original contents of the guarding
1454 * predicate. If the destination overwrites the guarding predicate,
1455 * then the easiest way to get this right is to save a copy.
1456 */
1457 int tofs = gofs;
1458 if (a->rd == a->pg) {
1459 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1460 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1461 }
1462
1463 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1464 do_predtest(s, dofs, tofs, psz / 8);
1465 }
1466 return true;
1467}
1468
1469static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1470{
1471 tcg_gen_and_i64(pd, pn, pm);
1472 tcg_gen_and_i64(pd, pd, pg);
1473}
1474
1475static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1476 TCGv_vec pm, TCGv_vec pg)
1477{
1478 tcg_gen_and_vec(vece, pd, pn, pm);
1479 tcg_gen_and_vec(vece, pd, pd, pg);
1480}
1481
3a7be554 1482static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1483{
1484 static const GVecGen4 op = {
1485 .fni8 = gen_and_pg_i64,
1486 .fniv = gen_and_pg_vec,
1487 .fno = gen_helper_sve_and_pppp,
1488 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1489 };
dd81a8d7
RH
1490
1491 if (!a->s) {
1492 if (!sve_access_check(s)) {
1493 return true;
1494 }
1495 if (a->rn == a->rm) {
1496 if (a->pg == a->rn) {
1497 do_mov_p(s, a->rd, a->rn);
1498 } else {
1499 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1500 }
1501 return true;
1502 } else if (a->pg == a->rn || a->pg == a->rm) {
1503 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1504 return true;
516e246a 1505 }
516e246a 1506 }
dd81a8d7 1507 return do_pppp_flags(s, a, &op);
516e246a
RH
1508}
1509
1510static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1511{
1512 tcg_gen_andc_i64(pd, pn, pm);
1513 tcg_gen_and_i64(pd, pd, pg);
1514}
1515
1516static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1517 TCGv_vec pm, TCGv_vec pg)
1518{
1519 tcg_gen_andc_vec(vece, pd, pn, pm);
1520 tcg_gen_and_vec(vece, pd, pd, pg);
1521}
1522
3a7be554 1523static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1524{
1525 static const GVecGen4 op = {
1526 .fni8 = gen_bic_pg_i64,
1527 .fniv = gen_bic_pg_vec,
1528 .fno = gen_helper_sve_bic_pppp,
1529 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1530 };
dd81a8d7
RH
1531
1532 if (!a->s && a->pg == a->rn) {
1533 if (sve_access_check(s)) {
1534 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1535 }
1536 return true;
516e246a 1537 }
dd81a8d7 1538 return do_pppp_flags(s, a, &op);
516e246a
RH
1539}
1540
1541static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1542{
1543 tcg_gen_xor_i64(pd, pn, pm);
1544 tcg_gen_and_i64(pd, pd, pg);
1545}
1546
1547static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1548 TCGv_vec pm, TCGv_vec pg)
1549{
1550 tcg_gen_xor_vec(vece, pd, pn, pm);
1551 tcg_gen_and_vec(vece, pd, pd, pg);
1552}
1553
3a7be554 1554static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1555{
1556 static const GVecGen4 op = {
1557 .fni8 = gen_eor_pg_i64,
1558 .fniv = gen_eor_pg_vec,
1559 .fno = gen_helper_sve_eor_pppp,
1560 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1561 };
dd81a8d7 1562 return do_pppp_flags(s, a, &op);
516e246a
RH
1563}
1564
3a7be554 1565static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1566{
516e246a
RH
1567 if (a->s) {
1568 return false;
516e246a 1569 }
d4bc6232
RH
1570 if (sve_access_check(s)) {
1571 unsigned psz = pred_gvec_reg_size(s);
1572 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1573 pred_full_reg_offset(s, a->pg),
1574 pred_full_reg_offset(s, a->rn),
1575 pred_full_reg_offset(s, a->rm), psz, psz);
1576 }
1577 return true;
516e246a
RH
1578}
1579
1580static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1581{
1582 tcg_gen_or_i64(pd, pn, pm);
1583 tcg_gen_and_i64(pd, pd, pg);
1584}
1585
1586static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1587 TCGv_vec pm, TCGv_vec pg)
1588{
1589 tcg_gen_or_vec(vece, pd, pn, pm);
1590 tcg_gen_and_vec(vece, pd, pd, pg);
1591}
1592
3a7be554 1593static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1594{
1595 static const GVecGen4 op = {
1596 .fni8 = gen_orr_pg_i64,
1597 .fniv = gen_orr_pg_vec,
1598 .fno = gen_helper_sve_orr_pppp,
1599 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1600 };
dd81a8d7
RH
1601
1602 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1603 return do_mov_p(s, a->rd, a->rn);
516e246a 1604 }
dd81a8d7 1605 return do_pppp_flags(s, a, &op);
516e246a
RH
1606}
1607
1608static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1609{
1610 tcg_gen_orc_i64(pd, pn, pm);
1611 tcg_gen_and_i64(pd, pd, pg);
1612}
1613
1614static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1615 TCGv_vec pm, TCGv_vec pg)
1616{
1617 tcg_gen_orc_vec(vece, pd, pn, pm);
1618 tcg_gen_and_vec(vece, pd, pd, pg);
1619}
1620
3a7be554 1621static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1622{
1623 static const GVecGen4 op = {
1624 .fni8 = gen_orn_pg_i64,
1625 .fniv = gen_orn_pg_vec,
1626 .fno = gen_helper_sve_orn_pppp,
1627 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1628 };
dd81a8d7 1629 return do_pppp_flags(s, a, &op);
516e246a
RH
1630}
1631
1632static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1633{
1634 tcg_gen_or_i64(pd, pn, pm);
1635 tcg_gen_andc_i64(pd, pg, pd);
1636}
1637
1638static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1639 TCGv_vec pm, TCGv_vec pg)
1640{
1641 tcg_gen_or_vec(vece, pd, pn, pm);
1642 tcg_gen_andc_vec(vece, pd, pg, pd);
1643}
1644
3a7be554 1645static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1646{
1647 static const GVecGen4 op = {
1648 .fni8 = gen_nor_pg_i64,
1649 .fniv = gen_nor_pg_vec,
1650 .fno = gen_helper_sve_nor_pppp,
1651 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1652 };
dd81a8d7 1653 return do_pppp_flags(s, a, &op);
516e246a
RH
1654}
1655
1656static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1657{
1658 tcg_gen_and_i64(pd, pn, pm);
1659 tcg_gen_andc_i64(pd, pg, pd);
1660}
1661
1662static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1663 TCGv_vec pm, TCGv_vec pg)
1664{
1665 tcg_gen_and_vec(vece, pd, pn, pm);
1666 tcg_gen_andc_vec(vece, pd, pg, pd);
1667}
1668
3a7be554 1669static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1670{
1671 static const GVecGen4 op = {
1672 .fni8 = gen_nand_pg_i64,
1673 .fniv = gen_nand_pg_vec,
1674 .fno = gen_helper_sve_nand_pppp,
1675 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1676 };
dd81a8d7 1677 return do_pppp_flags(s, a, &op);
516e246a
RH
1678}
1679
9e18d7a6
RH
1680/*
1681 *** SVE Predicate Misc Group
1682 */
1683
3a7be554 1684static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1685{
1686 if (sve_access_check(s)) {
1687 int nofs = pred_full_reg_offset(s, a->rn);
1688 int gofs = pred_full_reg_offset(s, a->pg);
1689 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1690
1691 if (words == 1) {
1692 TCGv_i64 pn = tcg_temp_new_i64();
1693 TCGv_i64 pg = tcg_temp_new_i64();
1694
1695 tcg_gen_ld_i64(pn, cpu_env, nofs);
1696 tcg_gen_ld_i64(pg, cpu_env, gofs);
1697 do_predtest1(pn, pg);
1698
1699 tcg_temp_free_i64(pn);
1700 tcg_temp_free_i64(pg);
1701 } else {
1702 do_predtest(s, nofs, gofs, words);
1703 }
1704 }
1705 return true;
1706}
1707
028e2a7b
RH
1708/* See the ARM pseudocode DecodePredCount. */
1709static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1710{
1711 unsigned elements = fullsz >> esz;
1712 unsigned bound;
1713
1714 switch (pattern) {
1715 case 0x0: /* POW2 */
1716 return pow2floor(elements);
1717 case 0x1: /* VL1 */
1718 case 0x2: /* VL2 */
1719 case 0x3: /* VL3 */
1720 case 0x4: /* VL4 */
1721 case 0x5: /* VL5 */
1722 case 0x6: /* VL6 */
1723 case 0x7: /* VL7 */
1724 case 0x8: /* VL8 */
1725 bound = pattern;
1726 break;
1727 case 0x9: /* VL16 */
1728 case 0xa: /* VL32 */
1729 case 0xb: /* VL64 */
1730 case 0xc: /* VL128 */
1731 case 0xd: /* VL256 */
1732 bound = 16 << (pattern - 9);
1733 break;
1734 case 0x1d: /* MUL4 */
1735 return elements - elements % 4;
1736 case 0x1e: /* MUL3 */
1737 return elements - elements % 3;
1738 case 0x1f: /* ALL */
1739 return elements;
1740 default: /* #uimm5 */
1741 return 0;
1742 }
1743 return elements >= bound ? bound : 0;
1744}
1745
1746/* This handles all of the predicate initialization instructions,
1747 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1748 * so that decode_pred_count returns 0. For SETFFR, we will have
1749 * set RD == 16 == FFR.
1750 */
1751static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1752{
1753 if (!sve_access_check(s)) {
1754 return true;
1755 }
1756
1757 unsigned fullsz = vec_full_reg_size(s);
1758 unsigned ofs = pred_full_reg_offset(s, rd);
1759 unsigned numelem, setsz, i;
1760 uint64_t word, lastword;
1761 TCGv_i64 t;
1762
1763 numelem = decode_pred_count(fullsz, pat, esz);
1764
1765 /* Determine what we must store into each bit, and how many. */
1766 if (numelem == 0) {
1767 lastword = word = 0;
1768 setsz = fullsz;
1769 } else {
1770 setsz = numelem << esz;
1771 lastword = word = pred_esz_masks[esz];
1772 if (setsz % 64) {
973558a3 1773 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1774 }
1775 }
1776
1777 t = tcg_temp_new_i64();
1778 if (fullsz <= 64) {
1779 tcg_gen_movi_i64(t, lastword);
1780 tcg_gen_st_i64(t, cpu_env, ofs);
1781 goto done;
1782 }
1783
1784 if (word == lastword) {
1785 unsigned maxsz = size_for_gvec(fullsz / 8);
1786 unsigned oprsz = size_for_gvec(setsz / 8);
1787
1788 if (oprsz * 8 == setsz) {
8711e71f 1789 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1790 goto done;
1791 }
028e2a7b
RH
1792 }
1793
1794 setsz /= 8;
1795 fullsz /= 8;
1796
1797 tcg_gen_movi_i64(t, word);
973558a3 1798 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1799 tcg_gen_st_i64(t, cpu_env, ofs + i);
1800 }
1801 if (lastword != word) {
1802 tcg_gen_movi_i64(t, lastword);
1803 tcg_gen_st_i64(t, cpu_env, ofs + i);
1804 i += 8;
1805 }
1806 if (i < fullsz) {
1807 tcg_gen_movi_i64(t, 0);
1808 for (; i < fullsz; i += 8) {
1809 tcg_gen_st_i64(t, cpu_env, ofs + i);
1810 }
1811 }
1812
1813 done:
1814 tcg_temp_free_i64(t);
1815
1816 /* PTRUES */
1817 if (setflag) {
1818 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1819 tcg_gen_movi_i32(cpu_CF, word == 0);
1820 tcg_gen_movi_i32(cpu_VF, 0);
1821 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1822 }
1823 return true;
1824}
1825
3a7be554 1826static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1827{
1828 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1829}
1830
3a7be554 1831static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1832{
1833 /* Note pat == 31 is #all, to set all elements. */
1834 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1835}
1836
3a7be554 1837static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1838{
1839 /* Note pat == 32 is #unimp, to set no elements. */
1840 return do_predset(s, 0, a->rd, 32, false);
1841}
1842
3a7be554 1843static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1844{
1845 /* The path through do_pppp_flags is complicated enough to want to avoid
1846 * duplication. Frob the arguments into the form of a predicated AND.
1847 */
1848 arg_rprr_s alt_a = {
1849 .rd = a->rd, .pg = a->pg, .s = a->s,
1850 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1851 };
3a7be554 1852 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1853}
1854
3a7be554 1855static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1856{
1857 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1858}
1859
3a7be554 1860static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1861{
1862 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1863}
1864
1865static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1866 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1867 TCGv_ptr, TCGv_i32))
1868{
1869 if (!sve_access_check(s)) {
1870 return true;
1871 }
1872
1873 TCGv_ptr t_pd = tcg_temp_new_ptr();
1874 TCGv_ptr t_pg = tcg_temp_new_ptr();
1875 TCGv_i32 t;
86300b5d 1876 unsigned desc = 0;
028e2a7b 1877
86300b5d
RH
1878 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1879 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1880
1881 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1882 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1883 t = tcg_const_i32(desc);
1884
1885 gen_fn(t, t_pd, t_pg, t);
1886 tcg_temp_free_ptr(t_pd);
1887 tcg_temp_free_ptr(t_pg);
1888
1889 do_pred_flags(t);
1890 tcg_temp_free_i32(t);
1891 return true;
1892}
1893
3a7be554 1894static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1895{
1896 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1897}
1898
3a7be554 1899static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1900{
1901 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1902}
1903
24e82e68
RH
1904/*
1905 *** SVE Element Count Group
1906 */
1907
1908/* Perform an inline saturating addition of a 32-bit value within
1909 * a 64-bit register. The second operand is known to be positive,
1910 * which halves the comparisions we must perform to bound the result.
1911 */
1912static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1913{
1914 int64_t ibound;
24e82e68
RH
1915
1916 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1917 if (u) {
1918 tcg_gen_ext32u_i64(reg, reg);
1919 } else {
1920 tcg_gen_ext32s_i64(reg, reg);
1921 }
1922 if (d) {
1923 tcg_gen_sub_i64(reg, reg, val);
1924 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1925 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1926 } else {
1927 tcg_gen_add_i64(reg, reg, val);
1928 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1929 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1930 }
24e82e68
RH
1931}
1932
1933/* Similarly with 64-bit values. */
1934static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1935{
1936 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1937 TCGv_i64 t2;
1938
1939 if (u) {
1940 if (d) {
1941 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1942 t2 = tcg_constant_i64(0);
1943 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1944 } else {
1945 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1946 t2 = tcg_constant_i64(-1);
1947 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1948 }
1949 } else {
35a1ec8e 1950 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1951 if (d) {
1952 /* Detect signed overflow for subtraction. */
1953 tcg_gen_xor_i64(t0, reg, val);
1954 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1955 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1956 tcg_gen_and_i64(t0, t0, reg);
1957
1958 /* Bound the result. */
1959 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1960 t2 = tcg_constant_i64(0);
24e82e68
RH
1961 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1962 } else {
1963 /* Detect signed overflow for addition. */
1964 tcg_gen_xor_i64(t0, reg, val);
1965 tcg_gen_add_i64(reg, reg, val);
1966 tcg_gen_xor_i64(t1, reg, val);
1967 tcg_gen_andc_i64(t0, t1, t0);
1968
1969 /* Bound the result. */
1970 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1971 t2 = tcg_constant_i64(0);
24e82e68
RH
1972 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1973 }
35a1ec8e 1974 tcg_temp_free_i64(t1);
24e82e68
RH
1975 }
1976 tcg_temp_free_i64(t0);
24e82e68
RH
1977}
1978
1979/* Similarly with a vector and a scalar operand. */
1980static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1981 TCGv_i64 val, bool u, bool d)
1982{
1983 unsigned vsz = vec_full_reg_size(s);
1984 TCGv_ptr dptr, nptr;
1985 TCGv_i32 t32, desc;
1986 TCGv_i64 t64;
1987
1988 dptr = tcg_temp_new_ptr();
1989 nptr = tcg_temp_new_ptr();
1990 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1991 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1992 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1993
1994 switch (esz) {
1995 case MO_8:
1996 t32 = tcg_temp_new_i32();
1997 tcg_gen_extrl_i64_i32(t32, val);
1998 if (d) {
1999 tcg_gen_neg_i32(t32, t32);
2000 }
2001 if (u) {
2002 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
2003 } else {
2004 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2005 }
2006 tcg_temp_free_i32(t32);
2007 break;
2008
2009 case MO_16:
2010 t32 = tcg_temp_new_i32();
2011 tcg_gen_extrl_i64_i32(t32, val);
2012 if (d) {
2013 tcg_gen_neg_i32(t32, t32);
2014 }
2015 if (u) {
2016 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2017 } else {
2018 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2019 }
2020 tcg_temp_free_i32(t32);
2021 break;
2022
2023 case MO_32:
2024 t64 = tcg_temp_new_i64();
2025 if (d) {
2026 tcg_gen_neg_i64(t64, val);
2027 } else {
2028 tcg_gen_mov_i64(t64, val);
2029 }
2030 if (u) {
2031 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2032 } else {
2033 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2034 }
2035 tcg_temp_free_i64(t64);
2036 break;
2037
2038 case MO_64:
2039 if (u) {
2040 if (d) {
2041 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2042 } else {
2043 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2044 }
2045 } else if (d) {
2046 t64 = tcg_temp_new_i64();
2047 tcg_gen_neg_i64(t64, val);
2048 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2049 tcg_temp_free_i64(t64);
2050 } else {
2051 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2052 }
2053 break;
2054
2055 default:
2056 g_assert_not_reached();
2057 }
2058
2059 tcg_temp_free_ptr(dptr);
2060 tcg_temp_free_ptr(nptr);
2061 tcg_temp_free_i32(desc);
2062}
2063
3a7be554 2064static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2065{
2066 if (sve_access_check(s)) {
2067 unsigned fullsz = vec_full_reg_size(s);
2068 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2069 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2070 }
2071 return true;
2072}
2073
3a7be554 2074static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2075{
2076 if (sve_access_check(s)) {
2077 unsigned fullsz = vec_full_reg_size(s);
2078 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2079 int inc = numelem * a->imm * (a->d ? -1 : 1);
2080 TCGv_i64 reg = cpu_reg(s, a->rd);
2081
2082 tcg_gen_addi_i64(reg, reg, inc);
2083 }
2084 return true;
2085}
2086
3a7be554 2087static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2088{
2089 if (!sve_access_check(s)) {
2090 return true;
2091 }
2092
2093 unsigned fullsz = vec_full_reg_size(s);
2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2095 int inc = numelem * a->imm;
2096 TCGv_i64 reg = cpu_reg(s, a->rd);
2097
2098 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2099 if (inc == 0) {
2100 if (a->u) {
2101 tcg_gen_ext32u_i64(reg, reg);
2102 } else {
2103 tcg_gen_ext32s_i64(reg, reg);
2104 }
2105 } else {
2106 TCGv_i64 t = tcg_const_i64(inc);
2107 do_sat_addsub_32(reg, t, a->u, a->d);
2108 tcg_temp_free_i64(t);
2109 }
2110 return true;
2111}
2112
3a7be554 2113static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2114{
2115 if (!sve_access_check(s)) {
2116 return true;
2117 }
2118
2119 unsigned fullsz = vec_full_reg_size(s);
2120 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2121 int inc = numelem * a->imm;
2122 TCGv_i64 reg = cpu_reg(s, a->rd);
2123
2124 if (inc != 0) {
2125 TCGv_i64 t = tcg_const_i64(inc);
2126 do_sat_addsub_64(reg, t, a->u, a->d);
2127 tcg_temp_free_i64(t);
2128 }
2129 return true;
2130}
2131
3a7be554 2132static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2133{
2134 if (a->esz == 0) {
2135 return false;
2136 }
2137
2138 unsigned fullsz = vec_full_reg_size(s);
2139 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2140 int inc = numelem * a->imm;
2141
2142 if (inc != 0) {
2143 if (sve_access_check(s)) {
2144 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
2145 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2146 vec_full_reg_offset(s, a->rn),
2147 t, fullsz, fullsz);
2148 tcg_temp_free_i64(t);
2149 }
2150 } else {
2151 do_mov_z(s, a->rd, a->rn);
2152 }
2153 return true;
2154}
2155
3a7be554 2156static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2157{
2158 if (a->esz == 0) {
2159 return false;
2160 }
2161
2162 unsigned fullsz = vec_full_reg_size(s);
2163 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2164 int inc = numelem * a->imm;
2165
2166 if (inc != 0) {
2167 if (sve_access_check(s)) {
2168 TCGv_i64 t = tcg_const_i64(inc);
2169 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
2170 tcg_temp_free_i64(t);
2171 }
2172 } else {
2173 do_mov_z(s, a->rd, a->rn);
2174 }
2175 return true;
2176}
2177
e1fa1164
RH
2178/*
2179 *** SVE Bitwise Immediate Group
2180 */
2181
2182static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2183{
2184 uint64_t imm;
2185 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2186 extract32(a->dbm, 0, 6),
2187 extract32(a->dbm, 6, 6))) {
2188 return false;
2189 }
2190 if (sve_access_check(s)) {
2191 unsigned vsz = vec_full_reg_size(s);
2192 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2193 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2194 }
2195 return true;
2196}
2197
3a7be554 2198static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2199{
2200 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2201}
2202
3a7be554 2203static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2204{
2205 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2206}
2207
3a7be554 2208static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2209{
2210 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2211}
2212
3a7be554 2213static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2214{
2215 uint64_t imm;
2216 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2217 extract32(a->dbm, 0, 6),
2218 extract32(a->dbm, 6, 6))) {
2219 return false;
2220 }
2221 if (sve_access_check(s)) {
2222 do_dupi_z(s, a->rd, imm);
2223 }
2224 return true;
2225}
2226
f25a2361
RH
2227/*
2228 *** SVE Integer Wide Immediate - Predicated Group
2229 */
2230
2231/* Implement all merging copies. This is used for CPY (immediate),
2232 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2233 */
2234static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2235 TCGv_i64 val)
2236{
2237 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2238 static gen_cpy * const fns[4] = {
2239 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2240 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2241 };
2242 unsigned vsz = vec_full_reg_size(s);
2243 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2244 TCGv_ptr t_zd = tcg_temp_new_ptr();
2245 TCGv_ptr t_zn = tcg_temp_new_ptr();
2246 TCGv_ptr t_pg = tcg_temp_new_ptr();
2247
2248 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2249 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2250 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2251
2252 fns[esz](t_zd, t_zn, t_pg, val, desc);
2253
2254 tcg_temp_free_ptr(t_zd);
2255 tcg_temp_free_ptr(t_zn);
2256 tcg_temp_free_ptr(t_pg);
2257 tcg_temp_free_i32(desc);
2258}
2259
3a7be554 2260static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2261{
2262 if (a->esz == 0) {
2263 return false;
2264 }
2265 if (sve_access_check(s)) {
2266 /* Decode the VFP immediate. */
2267 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2268 TCGv_i64 t_imm = tcg_const_i64(imm);
2269 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2270 tcg_temp_free_i64(t_imm);
2271 }
2272 return true;
2273}
2274
3a7be554 2275static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2276{
3a7be554 2277 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2278 return false;
2279 }
2280 if (sve_access_check(s)) {
2281 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2282 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2283 tcg_temp_free_i64(t_imm);
2284 }
2285 return true;
2286}
2287
3a7be554 2288static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2289{
2290 static gen_helper_gvec_2i * const fns[4] = {
2291 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2292 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2293 };
2294
3a7be554 2295 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2296 return false;
2297 }
2298 if (sve_access_check(s)) {
2299 unsigned vsz = vec_full_reg_size(s);
2300 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2301 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2302 pred_full_reg_offset(s, a->pg),
2303 t_imm, vsz, vsz, 0, fns[a->esz]);
2304 tcg_temp_free_i64(t_imm);
2305 }
2306 return true;
2307}
2308
b94f8f60
RH
2309/*
2310 *** SVE Permute Extract Group
2311 */
2312
75114792 2313static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2314{
2315 if (!sve_access_check(s)) {
2316 return true;
2317 }
2318
2319 unsigned vsz = vec_full_reg_size(s);
75114792 2320 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2321 unsigned n_siz = vsz - n_ofs;
75114792
SL
2322 unsigned d = vec_full_reg_offset(s, rd);
2323 unsigned n = vec_full_reg_offset(s, rn);
2324 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2325
2326 /* Use host vector move insns if we have appropriate sizes
2327 * and no unfortunate overlap.
2328 */
2329 if (m != d
2330 && n_ofs == size_for_gvec(n_ofs)
2331 && n_siz == size_for_gvec(n_siz)
2332 && (d != n || n_siz <= n_ofs)) {
2333 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2334 if (n_ofs != 0) {
2335 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2336 }
2337 } else {
2338 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2339 }
2340 return true;
2341}
2342
75114792
SL
2343static bool trans_EXT(DisasContext *s, arg_EXT *a)
2344{
2345 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2346}
2347
2348static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2349{
2350 if (!dc_isar_feature(aa64_sve2, s)) {
2351 return false;
2352 }
2353 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2354}
2355
30562ab7
RH
2356/*
2357 *** SVE Permute - Unpredicated Group
2358 */
2359
3a7be554 2360static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2361{
2362 if (sve_access_check(s)) {
2363 unsigned vsz = vec_full_reg_size(s);
2364 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2365 vsz, vsz, cpu_reg_sp(s, a->rn));
2366 }
2367 return true;
2368}
2369
3a7be554 2370static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2371{
2372 if ((a->imm & 0x1f) == 0) {
2373 return false;
2374 }
2375 if (sve_access_check(s)) {
2376 unsigned vsz = vec_full_reg_size(s);
2377 unsigned dofs = vec_full_reg_offset(s, a->rd);
2378 unsigned esz, index;
2379
2380 esz = ctz32(a->imm);
2381 index = a->imm >> (esz + 1);
2382
2383 if ((index << esz) < vsz) {
2384 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2385 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2386 } else {
7e17d50e
RH
2387 /*
2388 * While dup_mem handles 128-bit elements, dup_imm does not.
2389 * Thankfully element size doesn't matter for splatting zero.
2390 */
2391 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2392 }
2393 }
2394 return true;
2395}
2396
2397static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2398{
2399 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2400 static gen_insr * const fns[4] = {
2401 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2402 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2403 };
2404 unsigned vsz = vec_full_reg_size(s);
2405 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2406 TCGv_ptr t_zd = tcg_temp_new_ptr();
2407 TCGv_ptr t_zn = tcg_temp_new_ptr();
2408
2409 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2410 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2411
2412 fns[a->esz](t_zd, t_zn, val, desc);
2413
2414 tcg_temp_free_ptr(t_zd);
2415 tcg_temp_free_ptr(t_zn);
2416 tcg_temp_free_i32(desc);
2417}
2418
3a7be554 2419static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2420{
2421 if (sve_access_check(s)) {
2422 TCGv_i64 t = tcg_temp_new_i64();
2423 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2424 do_insr_i64(s, a, t);
2425 tcg_temp_free_i64(t);
2426 }
2427 return true;
2428}
2429
3a7be554 2430static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2431{
2432 if (sve_access_check(s)) {
2433 do_insr_i64(s, a, cpu_reg(s, a->rm));
2434 }
2435 return true;
2436}
2437
3a7be554 2438static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2439{
2440 static gen_helper_gvec_2 * const fns[4] = {
2441 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2442 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2443 };
2444
2445 if (sve_access_check(s)) {
40e32e5a 2446 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2447 }
2448 return true;
2449}
2450
3a7be554 2451static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2452{
2453 static gen_helper_gvec_3 * const fns[4] = {
2454 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2455 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2456 };
2457
2458 if (sve_access_check(s)) {
e645d1a1 2459 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2460 }
2461 return true;
2462}
2463
80a712a2
SL
2464static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2465{
2466 static gen_helper_gvec_4 * const fns[4] = {
2467 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2468 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2469 };
2470
2471 if (!dc_isar_feature(aa64_sve2, s)) {
2472 return false;
2473 }
2474 if (sve_access_check(s)) {
2475 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2476 (a->rn + 1) % 32, a->rm, 0);
2477 }
2478 return true;
2479}
2480
2481static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2482{
2483 static gen_helper_gvec_3 * const fns[4] = {
2484 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2485 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2486 };
2487
2488 if (!dc_isar_feature(aa64_sve2, s)) {
2489 return false;
2490 }
2491 if (sve_access_check(s)) {
2492 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2493 }
2494 return true;
2495}
2496
3a7be554 2497static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2498{
2499 static gen_helper_gvec_2 * const fns[4][2] = {
2500 { NULL, NULL },
2501 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2502 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2503 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2504 };
2505
2506 if (a->esz == 0) {
2507 return false;
2508 }
2509 if (sve_access_check(s)) {
2510 unsigned vsz = vec_full_reg_size(s);
2511 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2512 vec_full_reg_offset(s, a->rn)
2513 + (a->h ? vsz / 2 : 0),
2514 vsz, vsz, 0, fns[a->esz][a->u]);
2515 }
2516 return true;
2517}
2518
d731d8cb
RH
2519/*
2520 *** SVE Permute - Predicates Group
2521 */
2522
2523static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2524 gen_helper_gvec_3 *fn)
2525{
2526 if (!sve_access_check(s)) {
2527 return true;
2528 }
2529
2530 unsigned vsz = pred_full_reg_size(s);
2531
d731d8cb
RH
2532 TCGv_ptr t_d = tcg_temp_new_ptr();
2533 TCGv_ptr t_n = tcg_temp_new_ptr();
2534 TCGv_ptr t_m = tcg_temp_new_ptr();
2535 TCGv_i32 t_desc;
f9b0fcce 2536 uint32_t desc = 0;
d731d8cb 2537
f9b0fcce
RH
2538 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2539 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2540 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2541
2542 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2543 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2544 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2545 t_desc = tcg_const_i32(desc);
2546
2547 fn(t_d, t_n, t_m, t_desc);
2548
2549 tcg_temp_free_ptr(t_d);
2550 tcg_temp_free_ptr(t_n);
2551 tcg_temp_free_ptr(t_m);
2552 tcg_temp_free_i32(t_desc);
2553 return true;
2554}
2555
2556static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2557 gen_helper_gvec_2 *fn)
2558{
2559 if (!sve_access_check(s)) {
2560 return true;
2561 }
2562
2563 unsigned vsz = pred_full_reg_size(s);
2564 TCGv_ptr t_d = tcg_temp_new_ptr();
2565 TCGv_ptr t_n = tcg_temp_new_ptr();
2566 TCGv_i32 t_desc;
70acaafe 2567 uint32_t desc = 0;
d731d8cb
RH
2568
2569 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2570 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2571
70acaafe
RH
2572 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2573 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2574 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2575 t_desc = tcg_const_i32(desc);
2576
2577 fn(t_d, t_n, t_desc);
2578
2579 tcg_temp_free_i32(t_desc);
2580 tcg_temp_free_ptr(t_d);
2581 tcg_temp_free_ptr(t_n);
2582 return true;
2583}
2584
3a7be554 2585static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2586{
2587 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2588}
2589
3a7be554 2590static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2591{
2592 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2593}
2594
3a7be554 2595static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2596{
2597 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2598}
2599
3a7be554 2600static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2601{
2602 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2603}
2604
3a7be554 2605static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2606{
2607 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2608}
2609
3a7be554 2610static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2611{
2612 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2613}
2614
3a7be554 2615static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2616{
2617 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2618}
2619
3a7be554 2620static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2621{
2622 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2623}
2624
3a7be554 2625static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2626{
2627 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2628}
2629
234b48e9
RH
2630/*
2631 *** SVE Permute - Interleaving Group
2632 */
2633
2634static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2635{
2636 static gen_helper_gvec_3 * const fns[4] = {
2637 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2638 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2639 };
2640
2641 if (sve_access_check(s)) {
2642 unsigned vsz = vec_full_reg_size(s);
2643 unsigned high_ofs = high ? vsz / 2 : 0;
2644 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2645 vec_full_reg_offset(s, a->rn) + high_ofs,
2646 vec_full_reg_offset(s, a->rm) + high_ofs,
2647 vsz, vsz, 0, fns[a->esz]);
2648 }
2649 return true;
2650}
2651
2652static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2653 gen_helper_gvec_3 *fn)
2654{
2655 if (sve_access_check(s)) {
e645d1a1 2656 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2657 }
2658 return true;
2659}
2660
3a7be554 2661static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2662{
2663 return do_zip(s, a, false);
2664}
2665
3a7be554 2666static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2667{
2668 return do_zip(s, a, true);
2669}
2670
74b64b25
RH
2671static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2672{
2673 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2674 return false;
2675 }
2676 if (sve_access_check(s)) {
2677 unsigned vsz = vec_full_reg_size(s);
2678 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2679 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2680 vec_full_reg_offset(s, a->rn) + high_ofs,
2681 vec_full_reg_offset(s, a->rm) + high_ofs,
2682 vsz, vsz, 0, gen_helper_sve2_zip_q);
2683 }
2684 return true;
2685}
2686
2687static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2688{
2689 return do_zip_q(s, a, false);
2690}
2691
2692static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2693{
2694 return do_zip_q(s, a, true);
2695}
2696
234b48e9
RH
2697static gen_helper_gvec_3 * const uzp_fns[4] = {
2698 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2699 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2700};
2701
3a7be554 2702static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2703{
2704 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2705}
2706
3a7be554 2707static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2708{
2709 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2710}
2711
74b64b25
RH
2712static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2713{
2714 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2715 return false;
2716 }
2717 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
2718}
2719
2720static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2721{
2722 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2723 return false;
2724 }
2725 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
2726}
2727
234b48e9
RH
2728static gen_helper_gvec_3 * const trn_fns[4] = {
2729 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2730 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2731};
2732
3a7be554 2733static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2734{
2735 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2736}
2737
3a7be554 2738static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2739{
2740 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2741}
2742
74b64b25
RH
2743static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2744{
2745 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2746 return false;
2747 }
2748 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
2749}
2750
2751static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2752{
2753 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2754 return false;
2755 }
2756 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
2757}
2758
3ca879ae
RH
2759/*
2760 *** SVE Permute Vector - Predicated Group
2761 */
2762
3a7be554 2763static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2764{
2765 static gen_helper_gvec_3 * const fns[4] = {
2766 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2767 };
2768 return do_zpz_ool(s, a, fns[a->esz]);
2769}
2770
ef23cb72
RH
2771/* Call the helper that computes the ARM LastActiveElement pseudocode
2772 * function, scaled by the element size. This includes the not found
2773 * indication; e.g. not found for esz=3 is -8.
2774 */
2775static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2776{
2777 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2778 * round up, as we do elsewhere, because we need the exact size.
2779 */
2780 TCGv_ptr t_p = tcg_temp_new_ptr();
2781 TCGv_i32 t_desc;
2acbfbe4 2782 unsigned desc = 0;
ef23cb72 2783
2acbfbe4
RH
2784 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2785 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2786
2787 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2788 t_desc = tcg_const_i32(desc);
2789
2790 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2791
2792 tcg_temp_free_i32(t_desc);
2793 tcg_temp_free_ptr(t_p);
2794}
2795
2796/* Increment LAST to the offset of the next element in the vector,
2797 * wrapping around to 0.
2798 */
2799static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2800{
2801 unsigned vsz = vec_full_reg_size(s);
2802
2803 tcg_gen_addi_i32(last, last, 1 << esz);
2804 if (is_power_of_2(vsz)) {
2805 tcg_gen_andi_i32(last, last, vsz - 1);
2806 } else {
2807 TCGv_i32 max = tcg_const_i32(vsz);
2808 TCGv_i32 zero = tcg_const_i32(0);
2809 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2810 tcg_temp_free_i32(max);
2811 tcg_temp_free_i32(zero);
2812 }
2813}
2814
2815/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2816static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2817{
2818 unsigned vsz = vec_full_reg_size(s);
2819
2820 if (is_power_of_2(vsz)) {
2821 tcg_gen_andi_i32(last, last, vsz - 1);
2822 } else {
2823 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2824 TCGv_i32 zero = tcg_const_i32(0);
2825 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2826 tcg_temp_free_i32(max);
2827 tcg_temp_free_i32(zero);
2828 }
2829}
2830
2831/* Load an unsigned element of ESZ from BASE+OFS. */
2832static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2833{
2834 TCGv_i64 r = tcg_temp_new_i64();
2835
2836 switch (esz) {
2837 case 0:
2838 tcg_gen_ld8u_i64(r, base, ofs);
2839 break;
2840 case 1:
2841 tcg_gen_ld16u_i64(r, base, ofs);
2842 break;
2843 case 2:
2844 tcg_gen_ld32u_i64(r, base, ofs);
2845 break;
2846 case 3:
2847 tcg_gen_ld_i64(r, base, ofs);
2848 break;
2849 default:
2850 g_assert_not_reached();
2851 }
2852 return r;
2853}
2854
2855/* Load an unsigned element of ESZ from RM[LAST]. */
2856static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2857 int rm, int esz)
2858{
2859 TCGv_ptr p = tcg_temp_new_ptr();
2860 TCGv_i64 r;
2861
2862 /* Convert offset into vector into offset into ENV.
2863 * The final adjustment for the vector register base
2864 * is added via constant offset to the load.
2865 */
e03b5686 2866#if HOST_BIG_ENDIAN
ef23cb72
RH
2867 /* Adjust for element ordering. See vec_reg_offset. */
2868 if (esz < 3) {
2869 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2870 }
2871#endif
2872 tcg_gen_ext_i32_ptr(p, last);
2873 tcg_gen_add_ptr(p, p, cpu_env);
2874
2875 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2876 tcg_temp_free_ptr(p);
2877
2878 return r;
2879}
2880
2881/* Compute CLAST for a Zreg. */
2882static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2883{
2884 TCGv_i32 last;
2885 TCGLabel *over;
2886 TCGv_i64 ele;
2887 unsigned vsz, esz = a->esz;
2888
2889 if (!sve_access_check(s)) {
2890 return true;
2891 }
2892
2893 last = tcg_temp_local_new_i32();
2894 over = gen_new_label();
2895
2896 find_last_active(s, last, esz, a->pg);
2897
2898 /* There is of course no movcond for a 2048-bit vector,
2899 * so we must branch over the actual store.
2900 */
2901 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2902
2903 if (!before) {
2904 incr_last_active(s, last, esz);
2905 }
2906
2907 ele = load_last_active(s, last, a->rm, esz);
2908 tcg_temp_free_i32(last);
2909
2910 vsz = vec_full_reg_size(s);
2911 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2912 tcg_temp_free_i64(ele);
2913
2914 /* If this insn used MOVPRFX, we may need a second move. */
2915 if (a->rd != a->rn) {
2916 TCGLabel *done = gen_new_label();
2917 tcg_gen_br(done);
2918
2919 gen_set_label(over);
2920 do_mov_z(s, a->rd, a->rn);
2921
2922 gen_set_label(done);
2923 } else {
2924 gen_set_label(over);
2925 }
2926 return true;
2927}
2928
3a7be554 2929static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2930{
2931 return do_clast_vector(s, a, false);
2932}
2933
3a7be554 2934static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2935{
2936 return do_clast_vector(s, a, true);
2937}
2938
2939/* Compute CLAST for a scalar. */
2940static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2941 bool before, TCGv_i64 reg_val)
2942{
2943 TCGv_i32 last = tcg_temp_new_i32();
2944 TCGv_i64 ele, cmp, zero;
2945
2946 find_last_active(s, last, esz, pg);
2947
2948 /* Extend the original value of last prior to incrementing. */
2949 cmp = tcg_temp_new_i64();
2950 tcg_gen_ext_i32_i64(cmp, last);
2951
2952 if (!before) {
2953 incr_last_active(s, last, esz);
2954 }
2955
2956 /* The conceit here is that while last < 0 indicates not found, after
2957 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2958 * from which we can load garbage. We then discard the garbage with
2959 * a conditional move.
2960 */
2961 ele = load_last_active(s, last, rm, esz);
2962 tcg_temp_free_i32(last);
2963
2964 zero = tcg_const_i64(0);
2965 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2966
2967 tcg_temp_free_i64(zero);
2968 tcg_temp_free_i64(cmp);
2969 tcg_temp_free_i64(ele);
2970}
2971
2972/* Compute CLAST for a Vreg. */
2973static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2974{
2975 if (sve_access_check(s)) {
2976 int esz = a->esz;
2977 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2978 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2979
2980 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2981 write_fp_dreg(s, a->rd, reg);
2982 tcg_temp_free_i64(reg);
2983 }
2984 return true;
2985}
2986
3a7be554 2987static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2988{
2989 return do_clast_fp(s, a, false);
2990}
2991
3a7be554 2992static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2993{
2994 return do_clast_fp(s, a, true);
2995}
2996
2997/* Compute CLAST for a Xreg. */
2998static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2999{
3000 TCGv_i64 reg;
3001
3002 if (!sve_access_check(s)) {
3003 return true;
3004 }
3005
3006 reg = cpu_reg(s, a->rd);
3007 switch (a->esz) {
3008 case 0:
3009 tcg_gen_ext8u_i64(reg, reg);
3010 break;
3011 case 1:
3012 tcg_gen_ext16u_i64(reg, reg);
3013 break;
3014 case 2:
3015 tcg_gen_ext32u_i64(reg, reg);
3016 break;
3017 case 3:
3018 break;
3019 default:
3020 g_assert_not_reached();
3021 }
3022
3023 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
3024 return true;
3025}
3026
3a7be554 3027static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3028{
3029 return do_clast_general(s, a, false);
3030}
3031
3a7be554 3032static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3033{
3034 return do_clast_general(s, a, true);
3035}
3036
3037/* Compute LAST for a scalar. */
3038static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
3039 int pg, int rm, bool before)
3040{
3041 TCGv_i32 last = tcg_temp_new_i32();
3042 TCGv_i64 ret;
3043
3044 find_last_active(s, last, esz, pg);
3045 if (before) {
3046 wrap_last_active(s, last, esz);
3047 } else {
3048 incr_last_active(s, last, esz);
3049 }
3050
3051 ret = load_last_active(s, last, rm, esz);
3052 tcg_temp_free_i32(last);
3053 return ret;
3054}
3055
3056/* Compute LAST for a Vreg. */
3057static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
3058{
3059 if (sve_access_check(s)) {
3060 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3061 write_fp_dreg(s, a->rd, val);
3062 tcg_temp_free_i64(val);
3063 }
3064 return true;
3065}
3066
3a7be554 3067static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3068{
3069 return do_last_fp(s, a, false);
3070}
3071
3a7be554 3072static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3073{
3074 return do_last_fp(s, a, true);
3075}
3076
3077/* Compute LAST for a Xreg. */
3078static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3079{
3080 if (sve_access_check(s)) {
3081 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3082 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3083 tcg_temp_free_i64(val);
3084 }
3085 return true;
3086}
3087
3a7be554 3088static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3089{
3090 return do_last_general(s, a, false);
3091}
3092
3a7be554 3093static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3094{
3095 return do_last_general(s, a, true);
3096}
3097
3a7be554 3098static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3099{
3100 if (sve_access_check(s)) {
3101 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3102 }
3103 return true;
3104}
3105
3a7be554 3106static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3107{
3108 if (sve_access_check(s)) {
3109 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3110 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3111 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3112 tcg_temp_free_i64(t);
3113 }
3114 return true;
3115}
3116
3a7be554 3117static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3118{
3119 static gen_helper_gvec_3 * const fns[4] = {
3120 NULL,
3121 gen_helper_sve_revb_h,
3122 gen_helper_sve_revb_s,
3123 gen_helper_sve_revb_d,
3124 };
3125 return do_zpz_ool(s, a, fns[a->esz]);
3126}
3127
3a7be554 3128static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3129{
3130 static gen_helper_gvec_3 * const fns[4] = {
3131 NULL,
3132 NULL,
3133 gen_helper_sve_revh_s,
3134 gen_helper_sve_revh_d,
3135 };
3136 return do_zpz_ool(s, a, fns[a->esz]);
3137}
3138
3a7be554 3139static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3140{
3141 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3142}
3143
3a7be554 3144static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3145{
3146 static gen_helper_gvec_3 * const fns[4] = {
3147 gen_helper_sve_rbit_b,
3148 gen_helper_sve_rbit_h,
3149 gen_helper_sve_rbit_s,
3150 gen_helper_sve_rbit_d,
3151 };
3152 return do_zpz_ool(s, a, fns[a->esz]);
3153}
3154
3a7be554 3155static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
3156{
3157 if (sve_access_check(s)) {
36cbb7a8 3158 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 3159 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
3160 }
3161 return true;
3162}
3163
75114792
SL
3164static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3165{
3166 if (!dc_isar_feature(aa64_sve2, s)) {
3167 return false;
3168 }
3169 if (sve_access_check(s)) {
3170 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3171 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3172 }
3173 return true;
3174}
3175
757f9cff
RH
3176/*
3177 *** SVE Integer Compare - Vectors Group
3178 */
3179
3180static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3181 gen_helper_gvec_flags_4 *gen_fn)
3182{
3183 TCGv_ptr pd, zn, zm, pg;
3184 unsigned vsz;
3185 TCGv_i32 t;
3186
3187 if (gen_fn == NULL) {
3188 return false;
3189 }
3190 if (!sve_access_check(s)) {
3191 return true;
3192 }
3193
3194 vsz = vec_full_reg_size(s);
3195 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
3196 pd = tcg_temp_new_ptr();
3197 zn = tcg_temp_new_ptr();
3198 zm = tcg_temp_new_ptr();
3199 pg = tcg_temp_new_ptr();
3200
3201 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3202 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3203 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3204 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3205
3206 gen_fn(t, pd, zn, zm, pg, t);
3207
3208 tcg_temp_free_ptr(pd);
3209 tcg_temp_free_ptr(zn);
3210 tcg_temp_free_ptr(zm);
3211 tcg_temp_free_ptr(pg);
3212
3213 do_pred_flags(t);
3214
3215 tcg_temp_free_i32(t);
3216 return true;
3217}
3218
3219#define DO_PPZZ(NAME, name) \
3a7be554 3220static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3221{ \
3222 static gen_helper_gvec_flags_4 * const fns[4] = { \
3223 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3224 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3225 }; \
3226 return do_ppzz_flags(s, a, fns[a->esz]); \
3227}
3228
3229DO_PPZZ(CMPEQ, cmpeq)
3230DO_PPZZ(CMPNE, cmpne)
3231DO_PPZZ(CMPGT, cmpgt)
3232DO_PPZZ(CMPGE, cmpge)
3233DO_PPZZ(CMPHI, cmphi)
3234DO_PPZZ(CMPHS, cmphs)
3235
3236#undef DO_PPZZ
3237
3238#define DO_PPZW(NAME, name) \
3a7be554 3239static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3240{ \
3241 static gen_helper_gvec_flags_4 * const fns[4] = { \
3242 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3243 gen_helper_sve_##name##_ppzw_s, NULL \
3244 }; \
3245 return do_ppzz_flags(s, a, fns[a->esz]); \
3246}
3247
3248DO_PPZW(CMPEQ, cmpeq)
3249DO_PPZW(CMPNE, cmpne)
3250DO_PPZW(CMPGT, cmpgt)
3251DO_PPZW(CMPGE, cmpge)
3252DO_PPZW(CMPHI, cmphi)
3253DO_PPZW(CMPHS, cmphs)
3254DO_PPZW(CMPLT, cmplt)
3255DO_PPZW(CMPLE, cmple)
3256DO_PPZW(CMPLO, cmplo)
3257DO_PPZW(CMPLS, cmpls)
3258
3259#undef DO_PPZW
3260
38cadeba
RH
3261/*
3262 *** SVE Integer Compare - Immediate Groups
3263 */
3264
3265static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3266 gen_helper_gvec_flags_3 *gen_fn)
3267{
3268 TCGv_ptr pd, zn, pg;
3269 unsigned vsz;
3270 TCGv_i32 t;
3271
3272 if (gen_fn == NULL) {
3273 return false;
3274 }
3275 if (!sve_access_check(s)) {
3276 return true;
3277 }
3278
3279 vsz = vec_full_reg_size(s);
3280 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3281 pd = tcg_temp_new_ptr();
3282 zn = tcg_temp_new_ptr();
3283 pg = tcg_temp_new_ptr();
3284
3285 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3286 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3287 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3288
3289 gen_fn(t, pd, zn, pg, t);
3290
3291 tcg_temp_free_ptr(pd);
3292 tcg_temp_free_ptr(zn);
3293 tcg_temp_free_ptr(pg);
3294
3295 do_pred_flags(t);
3296
3297 tcg_temp_free_i32(t);
3298 return true;
3299}
3300
3301#define DO_PPZI(NAME, name) \
3a7be554 3302static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3303{ \
3304 static gen_helper_gvec_flags_3 * const fns[4] = { \
3305 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3306 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3307 }; \
3308 return do_ppzi_flags(s, a, fns[a->esz]); \
3309}
3310
3311DO_PPZI(CMPEQ, cmpeq)
3312DO_PPZI(CMPNE, cmpne)
3313DO_PPZI(CMPGT, cmpgt)
3314DO_PPZI(CMPGE, cmpge)
3315DO_PPZI(CMPHI, cmphi)
3316DO_PPZI(CMPHS, cmphs)
3317DO_PPZI(CMPLT, cmplt)
3318DO_PPZI(CMPLE, cmple)
3319DO_PPZI(CMPLO, cmplo)
3320DO_PPZI(CMPLS, cmpls)
3321
3322#undef DO_PPZI
3323
35da316f
RH
3324/*
3325 *** SVE Partition Break Group
3326 */
3327
3328static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3329 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3330{
3331 if (!sve_access_check(s)) {
3332 return true;
3333 }
3334
3335 unsigned vsz = pred_full_reg_size(s);
3336
3337 /* Predicate sizes may be smaller and cannot use simd_desc. */
3338 TCGv_ptr d = tcg_temp_new_ptr();
3339 TCGv_ptr n = tcg_temp_new_ptr();
3340 TCGv_ptr m = tcg_temp_new_ptr();
3341 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3342 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3343
3344 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3345 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3346 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3347 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3348
3349 if (a->s) {
3350 fn_s(t, d, n, m, g, t);
3351 do_pred_flags(t);
3352 } else {
3353 fn(d, n, m, g, t);
3354 }
3355 tcg_temp_free_ptr(d);
3356 tcg_temp_free_ptr(n);
3357 tcg_temp_free_ptr(m);
3358 tcg_temp_free_ptr(g);
3359 tcg_temp_free_i32(t);
3360 return true;
3361}
3362
3363static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3364 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3365{
3366 if (!sve_access_check(s)) {
3367 return true;
3368 }
3369
3370 unsigned vsz = pred_full_reg_size(s);
3371
3372 /* Predicate sizes may be smaller and cannot use simd_desc. */
3373 TCGv_ptr d = tcg_temp_new_ptr();
3374 TCGv_ptr n = tcg_temp_new_ptr();
3375 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3376 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3377
3378 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3379 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3380 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3381
3382 if (a->s) {
3383 fn_s(t, d, n, g, t);
3384 do_pred_flags(t);
3385 } else {
3386 fn(d, n, g, t);
3387 }
3388 tcg_temp_free_ptr(d);
3389 tcg_temp_free_ptr(n);
3390 tcg_temp_free_ptr(g);
3391 tcg_temp_free_i32(t);
3392 return true;
3393}
3394
3a7be554 3395static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3396{
3397 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3398}
3399
3a7be554 3400static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3401{
3402 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3403}
3404
3a7be554 3405static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3406{
3407 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3408}
3409
3a7be554 3410static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3411{
3412 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3413}
3414
3a7be554 3415static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3416{
3417 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3418}
3419
3a7be554 3420static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3421{
3422 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3423}
3424
3a7be554 3425static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3426{
3427 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3428}
3429
9ee3a611
RH
3430/*
3431 *** SVE Predicate Count Group
3432 */
3433
3434static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3435{
3436 unsigned psz = pred_full_reg_size(s);
3437
3438 if (psz <= 8) {
3439 uint64_t psz_mask;
3440
3441 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3442 if (pn != pg) {
3443 TCGv_i64 g = tcg_temp_new_i64();
3444 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3445 tcg_gen_and_i64(val, val, g);
3446 tcg_temp_free_i64(g);
3447 }
3448
3449 /* Reduce the pred_esz_masks value simply to reduce the
3450 * size of the code generated here.
3451 */
3452 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3453 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3454
3455 tcg_gen_ctpop_i64(val, val);
3456 } else {
3457 TCGv_ptr t_pn = tcg_temp_new_ptr();
3458 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3459 unsigned desc = 0;
9ee3a611
RH
3460 TCGv_i32 t_desc;
3461
f556a201
RH
3462 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3463 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3464
3465 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3466 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3467 t_desc = tcg_const_i32(desc);
3468
3469 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3470 tcg_temp_free_ptr(t_pn);
3471 tcg_temp_free_ptr(t_pg);
3472 tcg_temp_free_i32(t_desc);
3473 }
3474}
3475
3a7be554 3476static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3477{
3478 if (sve_access_check(s)) {
3479 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3480 }
3481 return true;
3482}
3483
3a7be554 3484static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3485{
3486 if (sve_access_check(s)) {
3487 TCGv_i64 reg = cpu_reg(s, a->rd);
3488 TCGv_i64 val = tcg_temp_new_i64();
3489
3490 do_cntp(s, val, a->esz, a->pg, a->pg);
3491 if (a->d) {
3492 tcg_gen_sub_i64(reg, reg, val);
3493 } else {
3494 tcg_gen_add_i64(reg, reg, val);
3495 }
3496 tcg_temp_free_i64(val);
3497 }
3498 return true;
3499}
3500
3a7be554 3501static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3502{
3503 if (a->esz == 0) {
3504 return false;
3505 }
3506 if (sve_access_check(s)) {
3507 unsigned vsz = vec_full_reg_size(s);
3508 TCGv_i64 val = tcg_temp_new_i64();
3509 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3510
3511 do_cntp(s, val, a->esz, a->pg, a->pg);
3512 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3513 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3514 }
3515 return true;
3516}
3517
3a7be554 3518static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3519{
3520 if (sve_access_check(s)) {
3521 TCGv_i64 reg = cpu_reg(s, a->rd);
3522 TCGv_i64 val = tcg_temp_new_i64();
3523
3524 do_cntp(s, val, a->esz, a->pg, a->pg);
3525 do_sat_addsub_32(reg, val, a->u, a->d);
3526 }
3527 return true;
3528}
3529
3a7be554 3530static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3531{
3532 if (sve_access_check(s)) {
3533 TCGv_i64 reg = cpu_reg(s, a->rd);
3534 TCGv_i64 val = tcg_temp_new_i64();
3535
3536 do_cntp(s, val, a->esz, a->pg, a->pg);
3537 do_sat_addsub_64(reg, val, a->u, a->d);
3538 }
3539 return true;
3540}
3541
3a7be554 3542static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3543{
3544 if (a->esz == 0) {
3545 return false;
3546 }
3547 if (sve_access_check(s)) {
3548 TCGv_i64 val = tcg_temp_new_i64();
3549 do_cntp(s, val, a->esz, a->pg, a->pg);
3550 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3551 }
3552 return true;
3553}
3554
caf1cefc
RH
3555/*
3556 *** SVE Integer Compare Scalars Group
3557 */
3558
3a7be554 3559static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3560{
3561 if (!sve_access_check(s)) {
3562 return true;
3563 }
3564
3565 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3566 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3567 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3568 TCGv_i64 cmp = tcg_temp_new_i64();
3569
3570 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3571 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3572 tcg_temp_free_i64(cmp);
3573
3574 /* VF = !NF & !CF. */
3575 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3576 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3577
3578 /* Both NF and VF actually look at bit 31. */
3579 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3580 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3581 return true;
3582}
3583
3a7be554 3584static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3585{
bbd0968c 3586 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3587 TCGv_i32 t2, t3;
3588 TCGv_ptr ptr;
e610906c
RH
3589 unsigned vsz = vec_full_reg_size(s);
3590 unsigned desc = 0;
caf1cefc 3591 TCGCond cond;
34688dbc
RH
3592 uint64_t maxval;
3593 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3594 bool eq = a->eq == a->lt;
caf1cefc 3595
34688dbc
RH
3596 /* The greater-than conditions are all SVE2. */
3597 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3598 return false;
3599 }
bbd0968c
RH
3600 if (!sve_access_check(s)) {
3601 return true;
3602 }
3603
3604 op0 = read_cpu_reg(s, a->rn, 1);
3605 op1 = read_cpu_reg(s, a->rm, 1);
3606
caf1cefc
RH
3607 if (!a->sf) {
3608 if (a->u) {
3609 tcg_gen_ext32u_i64(op0, op0);
3610 tcg_gen_ext32u_i64(op1, op1);
3611 } else {
3612 tcg_gen_ext32s_i64(op0, op0);
3613 tcg_gen_ext32s_i64(op1, op1);
3614 }
3615 }
3616
3617 /* For the helper, compress the different conditions into a computation
3618 * of how many iterations for which the condition is true.
caf1cefc 3619 */
bbd0968c
RH
3620 t0 = tcg_temp_new_i64();
3621 t1 = tcg_temp_new_i64();
34688dbc
RH
3622
3623 if (a->lt) {
3624 tcg_gen_sub_i64(t0, op1, op0);
3625 if (a->u) {
3626 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3627 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3628 } else {
3629 maxval = a->sf ? INT64_MAX : INT32_MAX;
3630 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3631 }
3632 } else {
3633 tcg_gen_sub_i64(t0, op0, op1);
3634 if (a->u) {
3635 maxval = 0;
3636 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3637 } else {
3638 maxval = a->sf ? INT64_MIN : INT32_MIN;
3639 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3640 }
3641 }
caf1cefc 3642
bbd0968c 3643 tmax = tcg_const_i64(vsz >> a->esz);
34688dbc 3644 if (eq) {
caf1cefc
RH
3645 /* Equality means one more iteration. */
3646 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3647
34688dbc
RH
3648 /*
3649 * For the less-than while, if op1 is maxval (and the only time
3650 * the addition above could overflow), then we produce an all-true
3651 * predicate by setting the count to the vector length. This is
3652 * because the pseudocode is described as an increment + compare
3653 * loop, and the maximum integer would always compare true.
3654 * Similarly, the greater-than while has the same issue with the
3655 * minimum integer due to the decrement + compare loop.
bbd0968c 3656 */
34688dbc 3657 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3658 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3659 }
3660
bbd0968c
RH
3661 /* Bound to the maximum. */
3662 tcg_gen_umin_i64(t0, t0, tmax);
3663 tcg_temp_free_i64(tmax);
3664
3665 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3666 tcg_gen_movi_i64(t1, 0);
3667 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3668 tcg_temp_free_i64(t1);
caf1cefc 3669
bbd0968c 3670 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3671 t2 = tcg_temp_new_i32();
3672 tcg_gen_extrl_i64_i32(t2, t0);
3673 tcg_temp_free_i64(t0);
bbd0968c
RH
3674
3675 /* Scale elements to bits. */
3676 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3677
e610906c
RH
3678 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3679 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3680 t3 = tcg_const_i32(desc);
3681
3682 ptr = tcg_temp_new_ptr();
3683 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3684
34688dbc
RH
3685 if (a->lt) {
3686 gen_helper_sve_whilel(t2, ptr, t2, t3);
3687 } else {
3688 gen_helper_sve_whileg(t2, ptr, t2, t3);
3689 }
caf1cefc
RH
3690 do_pred_flags(t2);
3691
3692 tcg_temp_free_ptr(ptr);
3693 tcg_temp_free_i32(t2);
3694 tcg_temp_free_i32(t3);
3695 return true;
3696}
3697
14f6dad1
RH
3698static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3699{
3700 TCGv_i64 op0, op1, diff, t1, tmax;
3701 TCGv_i32 t2, t3;
3702 TCGv_ptr ptr;
3703 unsigned vsz = vec_full_reg_size(s);
3704 unsigned desc = 0;
3705
3706 if (!dc_isar_feature(aa64_sve2, s)) {
3707 return false;
3708 }
3709 if (!sve_access_check(s)) {
3710 return true;
3711 }
3712
3713 op0 = read_cpu_reg(s, a->rn, 1);
3714 op1 = read_cpu_reg(s, a->rm, 1);
3715
3716 tmax = tcg_const_i64(vsz);
3717 diff = tcg_temp_new_i64();
3718
3719 if (a->rw) {
3720 /* WHILERW */
3721 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3722 t1 = tcg_temp_new_i64();
3723 tcg_gen_sub_i64(diff, op0, op1);
3724 tcg_gen_sub_i64(t1, op1, op0);
3725 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3726 tcg_temp_free_i64(t1);
3727 /* Round down to a multiple of ESIZE. */
3728 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3729 /* If op1 == op0, diff == 0, and the condition is always true. */
3730 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3731 } else {
3732 /* WHILEWR */
3733 tcg_gen_sub_i64(diff, op1, op0);
3734 /* Round down to a multiple of ESIZE. */
3735 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3736 /* If op0 >= op1, diff <= 0, the condition is always true. */
3737 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3738 }
3739
3740 /* Bound to the maximum. */
3741 tcg_gen_umin_i64(diff, diff, tmax);
3742 tcg_temp_free_i64(tmax);
3743
3744 /* Since we're bounded, pass as a 32-bit type. */
3745 t2 = tcg_temp_new_i32();
3746 tcg_gen_extrl_i64_i32(t2, diff);
3747 tcg_temp_free_i64(diff);
3748
3749 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3750 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3751 t3 = tcg_const_i32(desc);
3752
3753 ptr = tcg_temp_new_ptr();
3754 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3755
3756 gen_helper_sve_whilel(t2, ptr, t2, t3);
3757 do_pred_flags(t2);
3758
3759 tcg_temp_free_ptr(ptr);
3760 tcg_temp_free_i32(t2);
3761 tcg_temp_free_i32(t3);
3762 return true;
3763}
3764
ed491961
RH
3765/*
3766 *** SVE Integer Wide Immediate - Unpredicated Group
3767 */
3768
3a7be554 3769static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3770{
3771 if (a->esz == 0) {
3772 return false;
3773 }
3774 if (sve_access_check(s)) {
3775 unsigned vsz = vec_full_reg_size(s);
3776 int dofs = vec_full_reg_offset(s, a->rd);
3777 uint64_t imm;
3778
3779 /* Decode the VFP immediate. */
3780 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3781 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3782 }
3783 return true;
3784}
3785
3a7be554 3786static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3787{
3a7be554 3788 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3789 return false;
3790 }
3791 if (sve_access_check(s)) {
3792 unsigned vsz = vec_full_reg_size(s);
3793 int dofs = vec_full_reg_offset(s, a->rd);
3794
8711e71f 3795 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3796 }
3797 return true;
3798}
3799
3a7be554 3800static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3801{
3a7be554 3802 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3803 return false;
3804 }
3805 if (sve_access_check(s)) {
3806 unsigned vsz = vec_full_reg_size(s);
3807 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3808 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3809 }
3810 return true;
3811}
3812
3a7be554 3813static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3814{
3815 a->imm = -a->imm;
3a7be554 3816 return trans_ADD_zzi(s, a);
6e6a157d
RH
3817}
3818
3a7be554 3819static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3820{
53229a77 3821 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3822 static const GVecGen2s op[4] = {
3823 { .fni8 = tcg_gen_vec_sub8_i64,
3824 .fniv = tcg_gen_sub_vec,
3825 .fno = gen_helper_sve_subri_b,
53229a77 3826 .opt_opc = vecop_list,
6e6a157d
RH
3827 .vece = MO_8,
3828 .scalar_first = true },
3829 { .fni8 = tcg_gen_vec_sub16_i64,
3830 .fniv = tcg_gen_sub_vec,
3831 .fno = gen_helper_sve_subri_h,
53229a77 3832 .opt_opc = vecop_list,
6e6a157d
RH
3833 .vece = MO_16,
3834 .scalar_first = true },
3835 { .fni4 = tcg_gen_sub_i32,
3836 .fniv = tcg_gen_sub_vec,
3837 .fno = gen_helper_sve_subri_s,
53229a77 3838 .opt_opc = vecop_list,
6e6a157d
RH
3839 .vece = MO_32,
3840 .scalar_first = true },
3841 { .fni8 = tcg_gen_sub_i64,
3842 .fniv = tcg_gen_sub_vec,
3843 .fno = gen_helper_sve_subri_d,
53229a77 3844 .opt_opc = vecop_list,
6e6a157d
RH
3845 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3846 .vece = MO_64,
3847 .scalar_first = true }
3848 };
3849
3a7be554 3850 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3851 return false;
3852 }
3853 if (sve_access_check(s)) {
3854 unsigned vsz = vec_full_reg_size(s);
3855 TCGv_i64 c = tcg_const_i64(a->imm);
3856 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3857 vec_full_reg_offset(s, a->rn),
3858 vsz, vsz, c, &op[a->esz]);
3859 tcg_temp_free_i64(c);
3860 }
3861 return true;
3862}
3863
3a7be554 3864static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3865{
3866 if (sve_access_check(s)) {
3867 unsigned vsz = vec_full_reg_size(s);
3868 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3869 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3870 }
3871 return true;
3872}
3873
3a7be554 3874static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3875{
3a7be554 3876 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3877 return false;
3878 }
3879 if (sve_access_check(s)) {
3880 TCGv_i64 val = tcg_const_i64(a->imm);
3881 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3882 tcg_temp_free_i64(val);
3883 }
3884 return true;
3885}
3886
3a7be554 3887static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3888{
3a7be554 3889 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3890}
3891
3a7be554 3892static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3893{
3a7be554 3894 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3895}
3896
3a7be554 3897static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3898{
3a7be554 3899 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3900}
3901
3a7be554 3902static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3903{
3a7be554 3904 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3905}
3906
3907static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3908{
3909 if (sve_access_check(s)) {
3910 unsigned vsz = vec_full_reg_size(s);
3911 TCGv_i64 c = tcg_const_i64(a->imm);
3912
3913 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3914 vec_full_reg_offset(s, a->rn),
3915 c, vsz, vsz, 0, fn);
3916 tcg_temp_free_i64(c);
3917 }
3918 return true;
3919}
3920
3921#define DO_ZZI(NAME, name) \
3a7be554 3922static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3923{ \
3924 static gen_helper_gvec_2i * const fns[4] = { \
3925 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3926 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3927 }; \
3928 return do_zzi_ool(s, a, fns[a->esz]); \
3929}
3930
3931DO_ZZI(SMAX, smax)
3932DO_ZZI(UMAX, umax)
3933DO_ZZI(SMIN, smin)
3934DO_ZZI(UMIN, umin)
3935
3936#undef DO_ZZI
3937
bc2bd697 3938static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
d730ecaa 3939{
bc2bd697 3940 static gen_helper_gvec_4 * const fns[2][2] = {
d730ecaa
RH
3941 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3942 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3943 };
3944
3945 if (sve_access_check(s)) {
bc2bd697 3946 gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
d730ecaa
RH
3947 }
3948 return true;
3949}
3950
814d4c52
RH
3951/*
3952 * SVE Multiply - Indexed
3953 */
3954
0a82d963
RH
3955static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3956 gen_helper_gvec_4 *fn)
16fcfdc7 3957{
0a82d963
RH
3958 if (fn == NULL) {
3959 return false;
3960 }
16fcfdc7 3961 if (sve_access_check(s)) {
0a82d963 3962 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
16fcfdc7
RH
3963 }
3964 return true;
3965}
3966
0a82d963
RH
3967#define DO_RRXR(NAME, FUNC) \
3968 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3969 { return do_zzxz_ool(s, a, FUNC); }
3970
3971DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3972DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3973DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3974DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3975
2867039a
RH
3976static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3977{
3978 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3979 return false;
3980 }
3981 return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3982}
3983
3984static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3985{
3986 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3987 return false;
3988 }
3989 return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
3990}
3991
0a82d963 3992#undef DO_RRXR
16fcfdc7 3993
814d4c52
RH
3994static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
3995 gen_helper_gvec_3 *fn)
3996{
3997 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
3998 return false;
3999 }
4000 if (sve_access_check(s)) {
4001 unsigned vsz = vec_full_reg_size(s);
4002 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
4003 vec_full_reg_offset(s, rn),
4004 vec_full_reg_offset(s, rm),
4005 vsz, vsz, data, fn);
4006 }
4007 return true;
4008}
4009
4010#define DO_SVE2_RRX(NAME, FUNC) \
4011 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4012 { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
4013
4014DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
4015DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
4016DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
4017
1aee2d70
RH
4018DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
4019DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
4020DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
4021
4022DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
4023DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
4024DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
4025
814d4c52
RH
4026#undef DO_SVE2_RRX
4027
b95f5eeb
RH
4028#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
4029 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4030 { \
4031 return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \
4032 (a->index << 1) | TOP, FUNC); \
4033 }
4034
4035DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
4036DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
4037DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
4038DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
4039
d3949c4c
RH
4040DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
4041DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
4042DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
4043DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
4044
4045DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
4046DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
4047DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
4048DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
4049
b95f5eeb
RH
4050#undef DO_SVE2_RRX_TB
4051
8a02aac7
RH
4052static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
4053 int data, gen_helper_gvec_4 *fn)
4054{
4055 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4056 return false;
4057 }
4058 if (sve_access_check(s)) {
4059 unsigned vsz = vec_full_reg_size(s);
4060 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
4061 vec_full_reg_offset(s, rn),
4062 vec_full_reg_offset(s, rm),
4063 vec_full_reg_offset(s, ra),
4064 vsz, vsz, data, fn);
4065 }
4066 return true;
4067}
4068
4069#define DO_SVE2_RRXR(NAME, FUNC) \
4070 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4071 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
4072
4073DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
4074DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
4075DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
4076
4077DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
4078DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4079DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4080
75d6d5fc
RH
4081DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4082DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4083DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4084
4085DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4086DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4087DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4088
8a02aac7
RH
4089#undef DO_SVE2_RRXR
4090
c5c455d7
RH
4091#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4092 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4093 { \
4094 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4095 (a->index << 1) | TOP, FUNC); \
4096 }
4097
4098DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4099DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4100DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4101DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4102
4103DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4104DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4105DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4106DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
d462469f
RH
4107
4108DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4109DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4110DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4111DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4112
4113DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4114DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4115DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4116DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4117
4118DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4119DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4120DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4121DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4122
4123DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4124DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4125DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4126DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
4127
4128#undef DO_SVE2_RRXR_TB
4129
3b787ed8
RH
4130#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4131 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
4132 { \
4133 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \
4134 (a->index << 2) | a->rot, FUNC); \
4135 }
4136
4137DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4138DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4139
4140DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4141DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4142
21068f39
RH
4143DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4144DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4145
3b787ed8
RH
4146#undef DO_SVE2_RRXR_ROT
4147
ca40a6e6
RH
4148/*
4149 *** SVE Floating Point Multiply-Add Indexed Group
4150 */
4151
0a82d963 4152static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
4153{
4154 static gen_helper_gvec_4_ptr * const fns[3] = {
4155 gen_helper_gvec_fmla_idx_h,
4156 gen_helper_gvec_fmla_idx_s,
4157 gen_helper_gvec_fmla_idx_d,
4158 };
4159
4160 if (sve_access_check(s)) {
4161 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4162 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4163 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4164 vec_full_reg_offset(s, a->rn),
4165 vec_full_reg_offset(s, a->rm),
4166 vec_full_reg_offset(s, a->ra),
0a82d963 4167 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
4168 fns[a->esz - 1]);
4169 tcg_temp_free_ptr(status);
4170 }
4171 return true;
4172}
4173
0a82d963
RH
4174static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4175{
4176 return do_FMLA_zzxz(s, a, false);
4177}
4178
4179static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4180{
4181 return do_FMLA_zzxz(s, a, true);
4182}
4183
ca40a6e6
RH
4184/*
4185 *** SVE Floating Point Multiply Indexed Group
4186 */
4187
3a7be554 4188static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
4189{
4190 static gen_helper_gvec_3_ptr * const fns[3] = {
4191 gen_helper_gvec_fmul_idx_h,
4192 gen_helper_gvec_fmul_idx_s,
4193 gen_helper_gvec_fmul_idx_d,
4194 };
4195
4196 if (sve_access_check(s)) {
4197 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4198 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4199 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4200 vec_full_reg_offset(s, a->rn),
4201 vec_full_reg_offset(s, a->rm),
4202 status, vsz, vsz, a->index, fns[a->esz - 1]);
4203 tcg_temp_free_ptr(status);
4204 }
4205 return true;
4206}
4207
23fbe79f
RH
4208/*
4209 *** SVE Floating Point Fast Reduction Group
4210 */
4211
4212typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4213 TCGv_ptr, TCGv_i32);
4214
4215static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4216 gen_helper_fp_reduce *fn)
4217{
4218 unsigned vsz = vec_full_reg_size(s);
4219 unsigned p2vsz = pow2ceil(vsz);
c648c9b7 4220 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
4221 TCGv_ptr t_zn, t_pg, status;
4222 TCGv_i64 temp;
4223
4224 temp = tcg_temp_new_i64();
4225 t_zn = tcg_temp_new_ptr();
4226 t_pg = tcg_temp_new_ptr();
4227
4228 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4229 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4230 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
4231
4232 fn(temp, t_zn, t_pg, status, t_desc);
4233 tcg_temp_free_ptr(t_zn);
4234 tcg_temp_free_ptr(t_pg);
4235 tcg_temp_free_ptr(status);
4236 tcg_temp_free_i32(t_desc);
4237
4238 write_fp_dreg(s, a->rd, temp);
4239 tcg_temp_free_i64(temp);
4240}
4241
4242#define DO_VPZ(NAME, name) \
3a7be554 4243static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
4244{ \
4245 static gen_helper_fp_reduce * const fns[3] = { \
4246 gen_helper_sve_##name##_h, \
4247 gen_helper_sve_##name##_s, \
4248 gen_helper_sve_##name##_d, \
4249 }; \
4250 if (a->esz == 0) { \
4251 return false; \
4252 } \
4253 if (sve_access_check(s)) { \
4254 do_reduce(s, a, fns[a->esz - 1]); \
4255 } \
4256 return true; \
4257}
4258
4259DO_VPZ(FADDV, faddv)
4260DO_VPZ(FMINNMV, fminnmv)
4261DO_VPZ(FMAXNMV, fmaxnmv)
4262DO_VPZ(FMINV, fminv)
4263DO_VPZ(FMAXV, fmaxv)
4264
3887c038
RH
4265/*
4266 *** SVE Floating Point Unary Operations - Unpredicated Group
4267 */
4268
4269static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4270{
4271 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4272 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
4273
4274 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4275 vec_full_reg_offset(s, a->rn),
4276 status, vsz, vsz, 0, fn);
4277 tcg_temp_free_ptr(status);
4278}
4279
3a7be554 4280static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4281{
4282 static gen_helper_gvec_2_ptr * const fns[3] = {
4283 gen_helper_gvec_frecpe_h,
4284 gen_helper_gvec_frecpe_s,
4285 gen_helper_gvec_frecpe_d,
4286 };
4287 if (a->esz == 0) {
4288 return false;
4289 }
4290 if (sve_access_check(s)) {
4291 do_zz_fp(s, a, fns[a->esz - 1]);
4292 }
4293 return true;
4294}
4295
3a7be554 4296static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4297{
4298 static gen_helper_gvec_2_ptr * const fns[3] = {
4299 gen_helper_gvec_frsqrte_h,
4300 gen_helper_gvec_frsqrte_s,
4301 gen_helper_gvec_frsqrte_d,
4302 };
4303 if (a->esz == 0) {
4304 return false;
4305 }
4306 if (sve_access_check(s)) {
4307 do_zz_fp(s, a, fns[a->esz - 1]);
4308 }
4309 return true;
4310}
4311
4d2e2a03
RH
4312/*
4313 *** SVE Floating Point Compare with Zero Group
4314 */
4315
4316static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4317 gen_helper_gvec_3_ptr *fn)
4318{
4319 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4320 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4321
4322 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4323 vec_full_reg_offset(s, a->rn),
4324 pred_full_reg_offset(s, a->pg),
4325 status, vsz, vsz, 0, fn);
4326 tcg_temp_free_ptr(status);
4327}
4328
4329#define DO_PPZ(NAME, name) \
3a7be554 4330static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4331{ \
4332 static gen_helper_gvec_3_ptr * const fns[3] = { \
4333 gen_helper_sve_##name##_h, \
4334 gen_helper_sve_##name##_s, \
4335 gen_helper_sve_##name##_d, \
4336 }; \
4337 if (a->esz == 0) { \
4338 return false; \
4339 } \
4340 if (sve_access_check(s)) { \
4341 do_ppz_fp(s, a, fns[a->esz - 1]); \
4342 } \
4343 return true; \
4344}
4345
4346DO_PPZ(FCMGE_ppz0, fcmge0)
4347DO_PPZ(FCMGT_ppz0, fcmgt0)
4348DO_PPZ(FCMLE_ppz0, fcmle0)
4349DO_PPZ(FCMLT_ppz0, fcmlt0)
4350DO_PPZ(FCMEQ_ppz0, fcmeq0)
4351DO_PPZ(FCMNE_ppz0, fcmne0)
4352
4353#undef DO_PPZ
4354
67fcd9ad
RH
4355/*
4356 *** SVE floating-point trig multiply-add coefficient
4357 */
4358
3a7be554 4359static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4360{
4361 static gen_helper_gvec_3_ptr * const fns[3] = {
4362 gen_helper_sve_ftmad_h,
4363 gen_helper_sve_ftmad_s,
4364 gen_helper_sve_ftmad_d,
4365 };
4366
4367 if (a->esz == 0) {
4368 return false;
4369 }
4370 if (sve_access_check(s)) {
4371 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4372 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4373 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4374 vec_full_reg_offset(s, a->rn),
4375 vec_full_reg_offset(s, a->rm),
4376 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4377 tcg_temp_free_ptr(status);
4378 }
4379 return true;
4380}
4381
7f9ddf64
RH
4382/*
4383 *** SVE Floating Point Accumulating Reduction Group
4384 */
4385
3a7be554 4386static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4387{
4388 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4389 TCGv_ptr, TCGv_ptr, TCGv_i32);
4390 static fadda_fn * const fns[3] = {
4391 gen_helper_sve_fadda_h,
4392 gen_helper_sve_fadda_s,
4393 gen_helper_sve_fadda_d,
4394 };
4395 unsigned vsz = vec_full_reg_size(s);
4396 TCGv_ptr t_rm, t_pg, t_fpst;
4397 TCGv_i64 t_val;
4398 TCGv_i32 t_desc;
4399
4400 if (a->esz == 0) {
4401 return false;
4402 }
4403 if (!sve_access_check(s)) {
4404 return true;
4405 }
4406
4407 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4408 t_rm = tcg_temp_new_ptr();
4409 t_pg = tcg_temp_new_ptr();
4410 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4411 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4412 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
4413 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4414
4415 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4416
4417 tcg_temp_free_i32(t_desc);
4418 tcg_temp_free_ptr(t_fpst);
4419 tcg_temp_free_ptr(t_pg);
4420 tcg_temp_free_ptr(t_rm);
4421
4422 write_fp_dreg(s, a->rd, t_val);
4423 tcg_temp_free_i64(t_val);
4424 return true;
4425}
4426
29b80469
RH
4427/*
4428 *** SVE Floating Point Arithmetic - Unpredicated Group
4429 */
4430
4431static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4432 gen_helper_gvec_3_ptr *fn)
4433{
4434 if (fn == NULL) {
4435 return false;
4436 }
4437 if (sve_access_check(s)) {
4438 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4439 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4440 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4441 vec_full_reg_offset(s, a->rn),
4442 vec_full_reg_offset(s, a->rm),
4443 status, vsz, vsz, 0, fn);
4444 tcg_temp_free_ptr(status);
4445 }
4446 return true;
4447}
4448
4449
4450#define DO_FP3(NAME, name) \
3a7be554 4451static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4452{ \
4453 static gen_helper_gvec_3_ptr * const fns[4] = { \
4454 NULL, gen_helper_gvec_##name##_h, \
4455 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4456 }; \
4457 return do_zzz_fp(s, a, fns[a->esz]); \
4458}
4459
4460DO_FP3(FADD_zzz, fadd)
4461DO_FP3(FSUB_zzz, fsub)
4462DO_FP3(FMUL_zzz, fmul)
4463DO_FP3(FTSMUL, ftsmul)
4464DO_FP3(FRECPS, recps)
4465DO_FP3(FRSQRTS, rsqrts)
4466
4467#undef DO_FP3
4468
ec3b87c2
RH
4469/*
4470 *** SVE Floating Point Arithmetic - Predicated Group
4471 */
4472
4473static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4474 gen_helper_gvec_4_ptr *fn)
4475{
4476 if (fn == NULL) {
4477 return false;
4478 }
4479 if (sve_access_check(s)) {
4480 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4481 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4482 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4483 vec_full_reg_offset(s, a->rn),
4484 vec_full_reg_offset(s, a->rm),
4485 pred_full_reg_offset(s, a->pg),
4486 status, vsz, vsz, 0, fn);
4487 tcg_temp_free_ptr(status);
4488 }
4489 return true;
4490}
4491
4492#define DO_FP3(NAME, name) \
3a7be554 4493static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4494{ \
4495 static gen_helper_gvec_4_ptr * const fns[4] = { \
4496 NULL, gen_helper_sve_##name##_h, \
4497 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4498 }; \
4499 return do_zpzz_fp(s, a, fns[a->esz]); \
4500}
4501
4502DO_FP3(FADD_zpzz, fadd)
4503DO_FP3(FSUB_zpzz, fsub)
4504DO_FP3(FMUL_zpzz, fmul)
4505DO_FP3(FMIN_zpzz, fmin)
4506DO_FP3(FMAX_zpzz, fmax)
4507DO_FP3(FMINNM_zpzz, fminnum)
4508DO_FP3(FMAXNM_zpzz, fmaxnum)
4509DO_FP3(FABD, fabd)
4510DO_FP3(FSCALE, fscalbn)
4511DO_FP3(FDIV, fdiv)
4512DO_FP3(FMULX, fmulx)
4513
4514#undef DO_FP3
8092c6a3 4515
cc48affe
RH
4516typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4517 TCGv_i64, TCGv_ptr, TCGv_i32);
4518
4519static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4520 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4521{
4522 unsigned vsz = vec_full_reg_size(s);
4523 TCGv_ptr t_zd, t_zn, t_pg, status;
4524 TCGv_i32 desc;
4525
4526 t_zd = tcg_temp_new_ptr();
4527 t_zn = tcg_temp_new_ptr();
4528 t_pg = tcg_temp_new_ptr();
4529 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4530 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4531 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4532
cdfb22bb 4533 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
4534 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4535 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4536
4537 tcg_temp_free_i32(desc);
4538 tcg_temp_free_ptr(status);
4539 tcg_temp_free_ptr(t_pg);
4540 tcg_temp_free_ptr(t_zn);
4541 tcg_temp_free_ptr(t_zd);
4542}
4543
4544static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4545 gen_helper_sve_fp2scalar *fn)
4546{
4547 TCGv_i64 temp = tcg_const_i64(imm);
4548 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
4549 tcg_temp_free_i64(temp);
4550}
4551
4552#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4553static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4554{ \
4555 static gen_helper_sve_fp2scalar * const fns[3] = { \
4556 gen_helper_sve_##name##_h, \
4557 gen_helper_sve_##name##_s, \
4558 gen_helper_sve_##name##_d \
4559 }; \
4560 static uint64_t const val[3][2] = { \
4561 { float16_##const0, float16_##const1 }, \
4562 { float32_##const0, float32_##const1 }, \
4563 { float64_##const0, float64_##const1 }, \
4564 }; \
4565 if (a->esz == 0) { \
4566 return false; \
4567 } \
4568 if (sve_access_check(s)) { \
4569 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4570 } \
4571 return true; \
4572}
4573
cc48affe
RH
4574DO_FP_IMM(FADD, fadds, half, one)
4575DO_FP_IMM(FSUB, fsubs, half, one)
4576DO_FP_IMM(FMUL, fmuls, half, two)
4577DO_FP_IMM(FSUBR, fsubrs, half, one)
4578DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4579DO_FP_IMM(FMINNM, fminnms, zero, one)
4580DO_FP_IMM(FMAX, fmaxs, zero, one)
4581DO_FP_IMM(FMIN, fmins, zero, one)
4582
4583#undef DO_FP_IMM
4584
abfdefd5
RH
4585static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4586 gen_helper_gvec_4_ptr *fn)
4587{
4588 if (fn == NULL) {
4589 return false;
4590 }
4591 if (sve_access_check(s)) {
4592 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4593 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4594 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4595 vec_full_reg_offset(s, a->rn),
4596 vec_full_reg_offset(s, a->rm),
4597 pred_full_reg_offset(s, a->pg),
4598 status, vsz, vsz, 0, fn);
4599 tcg_temp_free_ptr(status);
4600 }
4601 return true;
4602}
4603
4604#define DO_FPCMP(NAME, name) \
3a7be554 4605static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4606{ \
4607 static gen_helper_gvec_4_ptr * const fns[4] = { \
4608 NULL, gen_helper_sve_##name##_h, \
4609 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4610 }; \
4611 return do_fp_cmp(s, a, fns[a->esz]); \
4612}
4613
4614DO_FPCMP(FCMGE, fcmge)
4615DO_FPCMP(FCMGT, fcmgt)
4616DO_FPCMP(FCMEQ, fcmeq)
4617DO_FPCMP(FCMNE, fcmne)
4618DO_FPCMP(FCMUO, fcmuo)
4619DO_FPCMP(FACGE, facge)
4620DO_FPCMP(FACGT, facgt)
4621
4622#undef DO_FPCMP
4623
3a7be554 4624static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4625{
4626 static gen_helper_gvec_4_ptr * const fns[3] = {
4627 gen_helper_sve_fcadd_h,
4628 gen_helper_sve_fcadd_s,
4629 gen_helper_sve_fcadd_d
4630 };
4631
4632 if (a->esz == 0) {
4633 return false;
4634 }
4635 if (sve_access_check(s)) {
4636 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4637 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4638 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4639 vec_full_reg_offset(s, a->rn),
4640 vec_full_reg_offset(s, a->rm),
4641 pred_full_reg_offset(s, a->pg),
4642 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4643 tcg_temp_free_ptr(status);
4644 }
4645 return true;
4646}
4647
08975da9
RH
4648static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4649 gen_helper_gvec_5_ptr *fn)
6ceabaad 4650{
08975da9 4651 if (a->esz == 0) {
6ceabaad
RH
4652 return false;
4653 }
08975da9
RH
4654 if (sve_access_check(s)) {
4655 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4656 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4657 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4658 vec_full_reg_offset(s, a->rn),
4659 vec_full_reg_offset(s, a->rm),
4660 vec_full_reg_offset(s, a->ra),
4661 pred_full_reg_offset(s, a->pg),
4662 status, vsz, vsz, 0, fn);
4663 tcg_temp_free_ptr(status);
6ceabaad 4664 }
6ceabaad
RH
4665 return true;
4666}
4667
4668#define DO_FMLA(NAME, name) \
3a7be554 4669static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4670{ \
08975da9 4671 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4672 NULL, gen_helper_sve_##name##_h, \
4673 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4674 }; \
4675 return do_fmla(s, a, fns[a->esz]); \
4676}
4677
4678DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4679DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4680DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4681DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4682
4683#undef DO_FMLA
4684
3a7be554 4685static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4686{
08975da9
RH
4687 static gen_helper_gvec_5_ptr * const fns[4] = {
4688 NULL,
05f48bab
RH
4689 gen_helper_sve_fcmla_zpzzz_h,
4690 gen_helper_sve_fcmla_zpzzz_s,
4691 gen_helper_sve_fcmla_zpzzz_d,
4692 };
4693
4694 if (a->esz == 0) {
4695 return false;
4696 }
4697 if (sve_access_check(s)) {
4698 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4699 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4700 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4701 vec_full_reg_offset(s, a->rn),
4702 vec_full_reg_offset(s, a->rm),
4703 vec_full_reg_offset(s, a->ra),
4704 pred_full_reg_offset(s, a->pg),
4705 status, vsz, vsz, a->rot, fns[a->esz]);
4706 tcg_temp_free_ptr(status);
05f48bab
RH
4707 }
4708 return true;
4709}
4710
3a7be554 4711static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4712{
636ddeb1 4713 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4714 gen_helper_gvec_fcmlah_idx,
4715 gen_helper_gvec_fcmlas_idx,
4716 };
4717
4718 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4719 tcg_debug_assert(a->rd == a->ra);
4720 if (sve_access_check(s)) {
4721 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4722 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4723 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4724 vec_full_reg_offset(s, a->rn),
4725 vec_full_reg_offset(s, a->rm),
636ddeb1 4726 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4727 status, vsz, vsz,
4728 a->index * 4 + a->rot,
4729 fns[a->esz - 1]);
4730 tcg_temp_free_ptr(status);
4731 }
4732 return true;
4733}
4734
8092c6a3
RH
4735/*
4736 *** SVE Floating Point Unary Operations Predicated Group
4737 */
4738
4739static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4740 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4741{
4742 if (sve_access_check(s)) {
4743 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4744 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4745 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4746 vec_full_reg_offset(s, rn),
4747 pred_full_reg_offset(s, pg),
4748 status, vsz, vsz, 0, fn);
4749 tcg_temp_free_ptr(status);
4750 }
4751 return true;
4752}
4753
3a7be554 4754static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4755{
e4ab5124 4756 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4757}
4758
3a7be554 4759static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4760{
4761 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4762}
4763
d29b17ca
RH
4764static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4765{
4766 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4767 return false;
4768 }
4769 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4770}
4771
3a7be554 4772static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4773{
e4ab5124 4774 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4775}
4776
3a7be554 4777static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4778{
4779 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4780}
4781
3a7be554 4782static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4783{
4784 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4785}
4786
3a7be554 4787static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4788{
4789 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4790}
4791
3a7be554 4792static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4793{
4794 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4795}
4796
3a7be554 4797static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4798{
4799 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4800}
4801
3a7be554 4802static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4803{
4804 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4805}
4806
3a7be554 4807static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4808{
4809 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4810}
4811
3a7be554 4812static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4813{
4814 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4815}
4816
3a7be554 4817static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4818{
4819 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4820}
4821
3a7be554 4822static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4823{
4824 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4825}
4826
3a7be554 4827static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4828{
4829 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4830}
4831
3a7be554 4832static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4833{
4834 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4835}
4836
3a7be554 4837static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4838{
4839 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4840}
4841
3a7be554 4842static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4843{
4844 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4845}
4846
3a7be554 4847static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4848{
4849 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4850}
4851
3a7be554 4852static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4853{
4854 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4855}
4856
3a7be554 4857static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4858{
4859 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4860}
4861
cda3c753
RH
4862static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4863 gen_helper_sve_frint_h,
4864 gen_helper_sve_frint_s,
4865 gen_helper_sve_frint_d
4866};
4867
3a7be554 4868static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4869{
4870 if (a->esz == 0) {
4871 return false;
4872 }
4873 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4874 frint_fns[a->esz - 1]);
4875}
4876
3a7be554 4877static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4878{
4879 static gen_helper_gvec_3_ptr * const fns[3] = {
4880 gen_helper_sve_frintx_h,
4881 gen_helper_sve_frintx_s,
4882 gen_helper_sve_frintx_d
4883 };
4884 if (a->esz == 0) {
4885 return false;
4886 }
4887 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4888}
4889
95365277
SL
4890static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4891 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4892{
cda3c753
RH
4893 if (sve_access_check(s)) {
4894 unsigned vsz = vec_full_reg_size(s);
4895 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4896 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4897
4898 gen_helper_set_rmode(tmode, tmode, status);
4899
4900 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4901 vec_full_reg_offset(s, a->rn),
4902 pred_full_reg_offset(s, a->pg),
95365277 4903 status, vsz, vsz, 0, fn);
cda3c753
RH
4904
4905 gen_helper_set_rmode(tmode, tmode, status);
4906 tcg_temp_free_i32(tmode);
4907 tcg_temp_free_ptr(status);
4908 }
4909 return true;
4910}
4911
3a7be554 4912static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4913{
95365277
SL
4914 if (a->esz == 0) {
4915 return false;
4916 }
4917 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4918}
4919
3a7be554 4920static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4921{
95365277
SL
4922 if (a->esz == 0) {
4923 return false;
4924 }
4925 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4926}
4927
3a7be554 4928static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4929{
95365277
SL
4930 if (a->esz == 0) {
4931 return false;
4932 }
4933 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4934}
4935
3a7be554 4936static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4937{
95365277
SL
4938 if (a->esz == 0) {
4939 return false;
4940 }
4941 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4942}
4943
3a7be554 4944static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4945{
95365277
SL
4946 if (a->esz == 0) {
4947 return false;
4948 }
4949 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4950}
4951
3a7be554 4952static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4953{
4954 static gen_helper_gvec_3_ptr * const fns[3] = {
4955 gen_helper_sve_frecpx_h,
4956 gen_helper_sve_frecpx_s,
4957 gen_helper_sve_frecpx_d
4958 };
4959 if (a->esz == 0) {
4960 return false;
4961 }
4962 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4963}
4964
3a7be554 4965static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4966{
4967 static gen_helper_gvec_3_ptr * const fns[3] = {
4968 gen_helper_sve_fsqrt_h,
4969 gen_helper_sve_fsqrt_s,
4970 gen_helper_sve_fsqrt_d
4971 };
4972 if (a->esz == 0) {
4973 return false;
4974 }
4975 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4976}
4977
3a7be554 4978static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4979{
4980 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4981}
4982
3a7be554 4983static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4984{
4985 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4986}
4987
3a7be554 4988static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4989{
4990 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4991}
4992
3a7be554 4993static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4994{
4995 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4996}
4997
3a7be554 4998static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4999{
5000 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
5001}
5002
3a7be554 5003static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5004{
5005 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
5006}
5007
3a7be554 5008static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5009{
5010 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
5011}
5012
3a7be554 5013static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5014{
5015 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
5016}
5017
3a7be554 5018static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5019{
5020 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
5021}
5022
3a7be554 5023static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5024{
5025 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
5026}
5027
3a7be554 5028static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5029{
5030 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
5031}
5032
3a7be554 5033static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5034{
5035 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
5036}
5037
3a7be554 5038static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5039{
5040 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
5041}
5042
3a7be554 5043static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5044{
5045 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
5046}
5047
d1822297
RH
5048/*
5049 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
5050 */
5051
5052/* Subroutine loading a vector register at VOFS of LEN bytes.
5053 * The load should begin at the address Rn + IMM.
5054 */
5055
19f2acc9 5056static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 5057{
19f2acc9
RH
5058 int len_align = QEMU_ALIGN_DOWN(len, 8);
5059 int len_remain = len % 8;
5060 int nparts = len / 8 + ctpop8(len_remain);
d1822297 5061 int midx = get_mem_index(s);
b2aa8879 5062 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 5063
b2aa8879
RH
5064 dirty_addr = tcg_temp_new_i64();
5065 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5066 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 5067 tcg_temp_free_i64(dirty_addr);
d1822297 5068
b2aa8879
RH
5069 /*
5070 * Note that unpredicated load/store of vector/predicate registers
d1822297 5071 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 5072 * operations on larger quantities.
d1822297
RH
5073 * Attempt to keep code expansion to a minimum by limiting the
5074 * amount of unrolling done.
5075 */
5076 if (nparts <= 4) {
5077 int i;
5078
b2aa8879 5079 t0 = tcg_temp_new_i64();
d1822297 5080 for (i = 0; i < len_align; i += 8) {
fc313c64 5081 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 5082 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 5083 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5084 }
b2aa8879 5085 tcg_temp_free_i64(t0);
d1822297
RH
5086 } else {
5087 TCGLabel *loop = gen_new_label();
5088 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5089
b2aa8879
RH
5090 /* Copy the clean address into a local temp, live across the loop. */
5091 t0 = clean_addr;
4b4dc975 5092 clean_addr = new_tmp_a64_local(s);
b2aa8879 5093 tcg_gen_mov_i64(clean_addr, t0);
d1822297 5094
b2aa8879 5095 gen_set_label(loop);
d1822297 5096
b2aa8879 5097 t0 = tcg_temp_new_i64();
fc313c64 5098 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 5099 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5100
b2aa8879 5101 tp = tcg_temp_new_ptr();
d1822297
RH
5102 tcg_gen_add_ptr(tp, cpu_env, i);
5103 tcg_gen_addi_ptr(i, i, 8);
5104 tcg_gen_st_i64(t0, tp, vofs);
5105 tcg_temp_free_ptr(tp);
b2aa8879 5106 tcg_temp_free_i64(t0);
d1822297
RH
5107
5108 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5109 tcg_temp_free_ptr(i);
5110 }
5111
b2aa8879
RH
5112 /*
5113 * Predicate register loads can be any multiple of 2.
d1822297
RH
5114 * Note that we still store the entire 64-bit unit into cpu_env.
5115 */
5116 if (len_remain) {
b2aa8879 5117 t0 = tcg_temp_new_i64();
d1822297
RH
5118 switch (len_remain) {
5119 case 2:
5120 case 4:
5121 case 8:
b2aa8879
RH
5122 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5123 MO_LE | ctz32(len_remain));
d1822297
RH
5124 break;
5125
5126 case 6:
5127 t1 = tcg_temp_new_i64();
b2aa8879
RH
5128 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5129 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5130 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
5131 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5132 tcg_temp_free_i64(t1);
5133 break;
5134
5135 default:
5136 g_assert_not_reached();
5137 }
5138 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 5139 tcg_temp_free_i64(t0);
d1822297 5140 }
d1822297
RH
5141}
5142
5047c204 5143/* Similarly for stores. */
19f2acc9 5144static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 5145{
19f2acc9
RH
5146 int len_align = QEMU_ALIGN_DOWN(len, 8);
5147 int len_remain = len % 8;
5148 int nparts = len / 8 + ctpop8(len_remain);
5047c204 5149 int midx = get_mem_index(s);
bba87d0a 5150 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 5151
bba87d0a
RH
5152 dirty_addr = tcg_temp_new_i64();
5153 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5154 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 5155 tcg_temp_free_i64(dirty_addr);
5047c204
RH
5156
5157 /* Note that unpredicated load/store of vector/predicate registers
5158 * are defined as a stream of bytes, which equates to little-endian
5159 * operations on larger quantities. There is no nice way to force
5160 * a little-endian store for aarch64_be-linux-user out of line.
5161 *
5162 * Attempt to keep code expansion to a minimum by limiting the
5163 * amount of unrolling done.
5164 */
5165 if (nparts <= 4) {
5166 int i;
5167
bba87d0a 5168 t0 = tcg_temp_new_i64();
5047c204
RH
5169 for (i = 0; i < len_align; i += 8) {
5170 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 5171 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 5172 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 5173 }
bba87d0a 5174 tcg_temp_free_i64(t0);
5047c204
RH
5175 } else {
5176 TCGLabel *loop = gen_new_label();
bba87d0a 5177 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 5178
bba87d0a
RH
5179 /* Copy the clean address into a local temp, live across the loop. */
5180 t0 = clean_addr;
4b4dc975 5181 clean_addr = new_tmp_a64_local(s);
bba87d0a 5182 tcg_gen_mov_i64(clean_addr, t0);
5047c204 5183
bba87d0a 5184 gen_set_label(loop);
5047c204 5185
bba87d0a
RH
5186 t0 = tcg_temp_new_i64();
5187 tp = tcg_temp_new_ptr();
5188 tcg_gen_add_ptr(tp, cpu_env, i);
5189 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 5190 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
5191 tcg_temp_free_ptr(tp);
5192
fc313c64 5193 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
5194 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5195 tcg_temp_free_i64(t0);
5047c204
RH
5196
5197 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5198 tcg_temp_free_ptr(i);
5199 }
5200
5201 /* Predicate register stores can be any multiple of 2. */
5202 if (len_remain) {
bba87d0a 5203 t0 = tcg_temp_new_i64();
5047c204 5204 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
5205
5206 switch (len_remain) {
5207 case 2:
5208 case 4:
5209 case 8:
bba87d0a
RH
5210 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5211 MO_LE | ctz32(len_remain));
5047c204
RH
5212 break;
5213
5214 case 6:
bba87d0a
RH
5215 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5216 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 5217 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 5218 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
5219 break;
5220
5221 default:
5222 g_assert_not_reached();
5223 }
bba87d0a 5224 tcg_temp_free_i64(t0);
5047c204 5225 }
5047c204
RH
5226}
5227
3a7be554 5228static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
5229{
5230 if (sve_access_check(s)) {
5231 int size = vec_full_reg_size(s);
5232 int off = vec_full_reg_offset(s, a->rd);
5233 do_ldr(s, off, size, a->rn, a->imm * size);
5234 }
5235 return true;
5236}
5237
3a7be554 5238static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
5239{
5240 if (sve_access_check(s)) {
5241 int size = pred_full_reg_size(s);
5242 int off = pred_full_reg_offset(s, a->rd);
5243 do_ldr(s, off, size, a->rn, a->imm * size);
5244 }
5245 return true;
5246}
c4e7c493 5247
3a7be554 5248static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
5249{
5250 if (sve_access_check(s)) {
5251 int size = vec_full_reg_size(s);
5252 int off = vec_full_reg_offset(s, a->rd);
5253 do_str(s, off, size, a->rn, a->imm * size);
5254 }
5255 return true;
5256}
5257
3a7be554 5258static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
5259{
5260 if (sve_access_check(s)) {
5261 int size = pred_full_reg_size(s);
5262 int off = pred_full_reg_offset(s, a->rd);
5263 do_str(s, off, size, a->rn, a->imm * size);
5264 }
5265 return true;
5266}
5267
c4e7c493
RH
5268/*
5269 *** SVE Memory - Contiguous Load Group
5270 */
5271
5272/* The memory mode of the dtype. */
14776ab5 5273static const MemOp dtype_mop[16] = {
c4e7c493
RH
5274 MO_UB, MO_UB, MO_UB, MO_UB,
5275 MO_SL, MO_UW, MO_UW, MO_UW,
5276 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 5277 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
5278};
5279
5280#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5281
5282/* The vector element size of dtype. */
5283static const uint8_t dtype_esz[16] = {
5284 0, 1, 2, 3,
5285 3, 1, 2, 3,
5286 3, 2, 2, 3,
5287 3, 2, 1, 3
5288};
5289
5290static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
5291 int dtype, uint32_t mte_n, bool is_write,
5292 gen_helper_gvec_mem *fn)
c4e7c493
RH
5293{
5294 unsigned vsz = vec_full_reg_size(s);
5295 TCGv_ptr t_pg;
500d0484 5296 TCGv_i32 t_desc;
206adacf 5297 int desc = 0;
c4e7c493 5298
206adacf
RH
5299 /*
5300 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
5301 * registers as pointers, so encode the regno into the data field.
5302 * For consistency, do this even for LD1.
5303 */
9473d0ec 5304 if (s->mte_active[0]) {
206adacf
RH
5305 int msz = dtype_msz(dtype);
5306
5307 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5308 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5309 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5310 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5311 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5312 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5313 } else {
5314 addr = clean_data_tbi(s, addr);
206adacf 5315 }
9473d0ec 5316
206adacf 5317 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 5318 t_desc = tcg_const_i32(desc);
c4e7c493
RH
5319 t_pg = tcg_temp_new_ptr();
5320
5321 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 5322 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
5323
5324 tcg_temp_free_ptr(t_pg);
500d0484 5325 tcg_temp_free_i32(t_desc);
c4e7c493
RH
5326}
5327
c182c6db
RH
5328/* Indexed by [mte][be][dtype][nreg] */
5329static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5330 { /* mte inactive, little-endian */
5331 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5332 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5333 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5334 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5335 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5336
5337 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5338 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5339 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5340 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5341 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5342
5343 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5344 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5345 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5346 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5347 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5348
5349 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5350 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5351 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5352 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5353 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5354
5355 /* mte inactive, big-endian */
5356 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5357 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5358 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5359 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5360 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5361
5362 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5363 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5364 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5365 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5366 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5367
5368 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5369 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5370 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5371 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5372 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5373
5374 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5375 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5376 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5377 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5378 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5379
5380 { /* mte active, little-endian */
5381 { { gen_helper_sve_ld1bb_r_mte,
5382 gen_helper_sve_ld2bb_r_mte,
5383 gen_helper_sve_ld3bb_r_mte,
5384 gen_helper_sve_ld4bb_r_mte },
5385 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5386 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5387 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5388
5389 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5390 { gen_helper_sve_ld1hh_le_r_mte,
5391 gen_helper_sve_ld2hh_le_r_mte,
5392 gen_helper_sve_ld3hh_le_r_mte,
5393 gen_helper_sve_ld4hh_le_r_mte },
5394 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5395 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5396
5397 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5398 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5399 { gen_helper_sve_ld1ss_le_r_mte,
5400 gen_helper_sve_ld2ss_le_r_mte,
5401 gen_helper_sve_ld3ss_le_r_mte,
5402 gen_helper_sve_ld4ss_le_r_mte },
5403 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5404
5405 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5406 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5407 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5408 { gen_helper_sve_ld1dd_le_r_mte,
5409 gen_helper_sve_ld2dd_le_r_mte,
5410 gen_helper_sve_ld3dd_le_r_mte,
5411 gen_helper_sve_ld4dd_le_r_mte } },
5412
5413 /* mte active, big-endian */
5414 { { gen_helper_sve_ld1bb_r_mte,
5415 gen_helper_sve_ld2bb_r_mte,
5416 gen_helper_sve_ld3bb_r_mte,
5417 gen_helper_sve_ld4bb_r_mte },
5418 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5419 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5420 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5421
5422 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5423 { gen_helper_sve_ld1hh_be_r_mte,
5424 gen_helper_sve_ld2hh_be_r_mte,
5425 gen_helper_sve_ld3hh_be_r_mte,
5426 gen_helper_sve_ld4hh_be_r_mte },
5427 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5428 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5429
5430 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5431 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5432 { gen_helper_sve_ld1ss_be_r_mte,
5433 gen_helper_sve_ld2ss_be_r_mte,
5434 gen_helper_sve_ld3ss_be_r_mte,
5435 gen_helper_sve_ld4ss_be_r_mte },
5436 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5437
5438 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5439 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5440 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5441 { gen_helper_sve_ld1dd_be_r_mte,
5442 gen_helper_sve_ld2dd_be_r_mte,
5443 gen_helper_sve_ld3dd_be_r_mte,
5444 gen_helper_sve_ld4dd_be_r_mte } } },
5445};
5446
c4e7c493
RH
5447static void do_ld_zpa(DisasContext *s, int zt, int pg,
5448 TCGv_i64 addr, int dtype, int nreg)
5449{
206adacf 5450 gen_helper_gvec_mem *fn
c182c6db 5451 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5452
206adacf
RH
5453 /*
5454 * While there are holes in the table, they are not
c4e7c493
RH
5455 * accessible via the instruction encoding.
5456 */
5457 assert(fn != NULL);
206adacf 5458 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5459}
5460
3a7be554 5461static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5462{
5463 if (a->rm == 31) {
5464 return false;
5465 }
5466 if (sve_access_check(s)) {
5467 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5468 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5469 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5470 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5471 }
5472 return true;
5473}
5474
3a7be554 5475static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5476{
5477 if (sve_access_check(s)) {
5478 int vsz = vec_full_reg_size(s);
5479 int elements = vsz >> dtype_esz[a->dtype];
5480 TCGv_i64 addr = new_tmp_a64(s);
5481
5482 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5483 (a->imm * elements * (a->nreg + 1))
5484 << dtype_msz(a->dtype));
5485 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5486 }
5487 return true;
5488}
e2654d75 5489
3a7be554 5490static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5491{
aa13f7c3
RH
5492 static gen_helper_gvec_mem * const fns[2][2][16] = {
5493 { /* mte inactive, little-endian */
5494 { gen_helper_sve_ldff1bb_r,
5495 gen_helper_sve_ldff1bhu_r,
5496 gen_helper_sve_ldff1bsu_r,
5497 gen_helper_sve_ldff1bdu_r,
5498
5499 gen_helper_sve_ldff1sds_le_r,
5500 gen_helper_sve_ldff1hh_le_r,
5501 gen_helper_sve_ldff1hsu_le_r,
5502 gen_helper_sve_ldff1hdu_le_r,
5503
5504 gen_helper_sve_ldff1hds_le_r,
5505 gen_helper_sve_ldff1hss_le_r,
5506 gen_helper_sve_ldff1ss_le_r,
5507 gen_helper_sve_ldff1sdu_le_r,
5508
5509 gen_helper_sve_ldff1bds_r,
5510 gen_helper_sve_ldff1bss_r,
5511 gen_helper_sve_ldff1bhs_r,
5512 gen_helper_sve_ldff1dd_le_r },
5513
5514 /* mte inactive, big-endian */
5515 { gen_helper_sve_ldff1bb_r,
5516 gen_helper_sve_ldff1bhu_r,
5517 gen_helper_sve_ldff1bsu_r,
5518 gen_helper_sve_ldff1bdu_r,
5519
5520 gen_helper_sve_ldff1sds_be_r,
5521 gen_helper_sve_ldff1hh_be_r,
5522 gen_helper_sve_ldff1hsu_be_r,
5523 gen_helper_sve_ldff1hdu_be_r,
5524
5525 gen_helper_sve_ldff1hds_be_r,
5526 gen_helper_sve_ldff1hss_be_r,
5527 gen_helper_sve_ldff1ss_be_r,
5528 gen_helper_sve_ldff1sdu_be_r,
5529
5530 gen_helper_sve_ldff1bds_r,
5531 gen_helper_sve_ldff1bss_r,
5532 gen_helper_sve_ldff1bhs_r,
5533 gen_helper_sve_ldff1dd_be_r } },
5534
5535 { /* mte active, little-endian */
5536 { gen_helper_sve_ldff1bb_r_mte,
5537 gen_helper_sve_ldff1bhu_r_mte,
5538 gen_helper_sve_ldff1bsu_r_mte,
5539 gen_helper_sve_ldff1bdu_r_mte,
5540
5541 gen_helper_sve_ldff1sds_le_r_mte,
5542 gen_helper_sve_ldff1hh_le_r_mte,
5543 gen_helper_sve_ldff1hsu_le_r_mte,
5544 gen_helper_sve_ldff1hdu_le_r_mte,
5545
5546 gen_helper_sve_ldff1hds_le_r_mte,
5547 gen_helper_sve_ldff1hss_le_r_mte,
5548 gen_helper_sve_ldff1ss_le_r_mte,
5549 gen_helper_sve_ldff1sdu_le_r_mte,
5550
5551 gen_helper_sve_ldff1bds_r_mte,
5552 gen_helper_sve_ldff1bss_r_mte,
5553 gen_helper_sve_ldff1bhs_r_mte,
5554 gen_helper_sve_ldff1dd_le_r_mte },
5555
5556 /* mte active, big-endian */
5557 { gen_helper_sve_ldff1bb_r_mte,
5558 gen_helper_sve_ldff1bhu_r_mte,
5559 gen_helper_sve_ldff1bsu_r_mte,
5560 gen_helper_sve_ldff1bdu_r_mte,
5561
5562 gen_helper_sve_ldff1sds_be_r_mte,
5563 gen_helper_sve_ldff1hh_be_r_mte,
5564 gen_helper_sve_ldff1hsu_be_r_mte,
5565 gen_helper_sve_ldff1hdu_be_r_mte,
5566
5567 gen_helper_sve_ldff1hds_be_r_mte,
5568 gen_helper_sve_ldff1hss_be_r_mte,
5569 gen_helper_sve_ldff1ss_be_r_mte,
5570 gen_helper_sve_ldff1sdu_be_r_mte,
5571
5572 gen_helper_sve_ldff1bds_r_mte,
5573 gen_helper_sve_ldff1bss_r_mte,
5574 gen_helper_sve_ldff1bhs_r_mte,
5575 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5576 };
5577
5578 if (sve_access_check(s)) {
5579 TCGv_i64 addr = new_tmp_a64(s);
5580 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5581 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5582 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5583 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5584 }
5585 return true;
5586}
5587
3a7be554 5588static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5589{
aa13f7c3
RH
5590 static gen_helper_gvec_mem * const fns[2][2][16] = {
5591 { /* mte inactive, little-endian */
5592 { gen_helper_sve_ldnf1bb_r,
5593 gen_helper_sve_ldnf1bhu_r,
5594 gen_helper_sve_ldnf1bsu_r,
5595 gen_helper_sve_ldnf1bdu_r,
5596
5597 gen_helper_sve_ldnf1sds_le_r,
5598 gen_helper_sve_ldnf1hh_le_r,
5599 gen_helper_sve_ldnf1hsu_le_r,
5600 gen_helper_sve_ldnf1hdu_le_r,
5601
5602 gen_helper_sve_ldnf1hds_le_r,
5603 gen_helper_sve_ldnf1hss_le_r,
5604 gen_helper_sve_ldnf1ss_le_r,
5605 gen_helper_sve_ldnf1sdu_le_r,
5606
5607 gen_helper_sve_ldnf1bds_r,
5608 gen_helper_sve_ldnf1bss_r,
5609 gen_helper_sve_ldnf1bhs_r,
5610 gen_helper_sve_ldnf1dd_le_r },
5611
5612 /* mte inactive, big-endian */
5613 { gen_helper_sve_ldnf1bb_r,
5614 gen_helper_sve_ldnf1bhu_r,
5615 gen_helper_sve_ldnf1bsu_r,
5616 gen_helper_sve_ldnf1bdu_r,
5617
5618 gen_helper_sve_ldnf1sds_be_r,
5619 gen_helper_sve_ldnf1hh_be_r,
5620 gen_helper_sve_ldnf1hsu_be_r,
5621 gen_helper_sve_ldnf1hdu_be_r,
5622
5623 gen_helper_sve_ldnf1hds_be_r,
5624 gen_helper_sve_ldnf1hss_be_r,
5625 gen_helper_sve_ldnf1ss_be_r,
5626 gen_helper_sve_ldnf1sdu_be_r,
5627
5628 gen_helper_sve_ldnf1bds_r,
5629 gen_helper_sve_ldnf1bss_r,
5630 gen_helper_sve_ldnf1bhs_r,
5631 gen_helper_sve_ldnf1dd_be_r } },
5632
5633 { /* mte inactive, little-endian */
5634 { gen_helper_sve_ldnf1bb_r_mte,
5635 gen_helper_sve_ldnf1bhu_r_mte,
5636 gen_helper_sve_ldnf1bsu_r_mte,
5637 gen_helper_sve_ldnf1bdu_r_mte,
5638
5639 gen_helper_sve_ldnf1sds_le_r_mte,
5640 gen_helper_sve_ldnf1hh_le_r_mte,
5641 gen_helper_sve_ldnf1hsu_le_r_mte,
5642 gen_helper_sve_ldnf1hdu_le_r_mte,
5643
5644 gen_helper_sve_ldnf1hds_le_r_mte,
5645 gen_helper_sve_ldnf1hss_le_r_mte,
5646 gen_helper_sve_ldnf1ss_le_r_mte,
5647 gen_helper_sve_ldnf1sdu_le_r_mte,
5648
5649 gen_helper_sve_ldnf1bds_r_mte,
5650 gen_helper_sve_ldnf1bss_r_mte,
5651 gen_helper_sve_ldnf1bhs_r_mte,
5652 gen_helper_sve_ldnf1dd_le_r_mte },
5653
5654 /* mte inactive, big-endian */
5655 { gen_helper_sve_ldnf1bb_r_mte,
5656 gen_helper_sve_ldnf1bhu_r_mte,
5657 gen_helper_sve_ldnf1bsu_r_mte,
5658 gen_helper_sve_ldnf1bdu_r_mte,
5659
5660 gen_helper_sve_ldnf1sds_be_r_mte,
5661 gen_helper_sve_ldnf1hh_be_r_mte,
5662 gen_helper_sve_ldnf1hsu_be_r_mte,
5663 gen_helper_sve_ldnf1hdu_be_r_mte,
5664
5665 gen_helper_sve_ldnf1hds_be_r_mte,
5666 gen_helper_sve_ldnf1hss_be_r_mte,
5667 gen_helper_sve_ldnf1ss_be_r_mte,
5668 gen_helper_sve_ldnf1sdu_be_r_mte,
5669
5670 gen_helper_sve_ldnf1bds_r_mte,
5671 gen_helper_sve_ldnf1bss_r_mte,
5672 gen_helper_sve_ldnf1bhs_r_mte,
5673 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5674 };
5675
5676 if (sve_access_check(s)) {
5677 int vsz = vec_full_reg_size(s);
5678 int elements = vsz >> dtype_esz[a->dtype];
5679 int off = (a->imm * elements) << dtype_msz(a->dtype);
5680 TCGv_i64 addr = new_tmp_a64(s);
5681
5682 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5683 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5684 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5685 }
5686 return true;
5687}
1a039c7e 5688
c182c6db 5689static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5690{
05abe304
RH
5691 unsigned vsz = vec_full_reg_size(s);
5692 TCGv_ptr t_pg;
7924d239 5693 int poff;
05abe304
RH
5694
5695 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5696 poff = pred_full_reg_offset(s, pg);
5697 if (vsz > 16) {
5698 /*
5699 * Zero-extend the first 16 bits of the predicate into a temporary.
5700 * This avoids triggering an assert making sure we don't have bits
5701 * set within a predicate beyond VQ, but we have lowered VQ to 1
5702 * for this load operation.
5703 */
5704 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5705#if HOST_BIG_ENDIAN
2a99ab2b
RH
5706 poff += 6;
5707#endif
5708 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5709
5710 poff = offsetof(CPUARMState, vfp.preg_tmp);
5711 tcg_gen_st_i64(tmp, cpu_env, poff);
5712 tcg_temp_free_i64(tmp);
5713 }
5714
05abe304 5715 t_pg = tcg_temp_new_ptr();
2a99ab2b 5716 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5717
c182c6db
RH
5718 gen_helper_gvec_mem *fn
5719 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5720 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5721
5722 tcg_temp_free_ptr(t_pg);
05abe304
RH
5723
5724 /* Replicate that first quadword. */
5725 if (vsz > 16) {
7924d239
RH
5726 int doff = vec_full_reg_offset(s, zt);
5727 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5728 }
5729}
5730
3a7be554 5731static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5732{
5733 if (a->rm == 31) {
5734 return false;
5735 }
5736 if (sve_access_check(s)) {
5737 int msz = dtype_msz(a->dtype);
5738 TCGv_i64 addr = new_tmp_a64(s);
5739 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5740 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5741 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5742 }
5743 return true;
5744}
5745
3a7be554 5746static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5747{
5748 if (sve_access_check(s)) {
5749 TCGv_i64 addr = new_tmp_a64(s);
5750 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5751 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5752 }
5753 return true;
5754}
5755
12c563f6
RH
5756static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5757{
5758 unsigned vsz = vec_full_reg_size(s);
5759 unsigned vsz_r32;
5760 TCGv_ptr t_pg;
5761 int poff, doff;
5762
5763 if (vsz < 32) {
5764 /*
5765 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5766 * in the ARM pseudocode, which is the sve_access_check() done
5767 * in our caller. We should not now return false from the caller.
5768 */
5769 unallocated_encoding(s);
5770 return;
5771 }
5772
5773 /* Load the first octaword using the normal predicated load helpers. */
5774
5775 poff = pred_full_reg_offset(s, pg);
5776 if (vsz > 32) {
5777 /*
5778 * Zero-extend the first 32 bits of the predicate into a temporary.
5779 * This avoids triggering an assert making sure we don't have bits
5780 * set within a predicate beyond VQ, but we have lowered VQ to 2
5781 * for this load operation.
5782 */
5783 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5784#if HOST_BIG_ENDIAN
12c563f6
RH
5785 poff += 4;
5786#endif
5787 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5788
5789 poff = offsetof(CPUARMState, vfp.preg_tmp);
5790 tcg_gen_st_i64(tmp, cpu_env, poff);
5791 tcg_temp_free_i64(tmp);
5792 }
5793
5794 t_pg = tcg_temp_new_ptr();
5795 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5796
5797 gen_helper_gvec_mem *fn
5798 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5799 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5800
5801 tcg_temp_free_ptr(t_pg);
5802
5803 /*
5804 * Replicate that first octaword.
5805 * The replication happens in units of 32; if the full vector size
5806 * is not a multiple of 32, the final bits are zeroed.
5807 */
5808 doff = vec_full_reg_offset(s, zt);
5809 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5810 if (vsz >= 64) {
5811 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5812 }
5813 vsz -= vsz_r32;
5814 if (vsz) {
5815 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5816 }
5817}
5818
5819static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5820{
5821 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5822 return false;
5823 }
5824 if (a->rm == 31) {
5825 return false;
5826 }
5827 if (sve_access_check(s)) {
5828 TCGv_i64 addr = new_tmp_a64(s);
5829 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5830 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5831 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5832 }
5833 return true;
5834}
5835
5836static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5837{
5838 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5839 return false;
5840 }
5841 if (sve_access_check(s)) {
5842 TCGv_i64 addr = new_tmp_a64(s);
5843 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5844 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5845 }
5846 return true;
5847}
5848
68459864 5849/* Load and broadcast element. */
3a7be554 5850static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5851{
68459864
RH
5852 unsigned vsz = vec_full_reg_size(s);
5853 unsigned psz = pred_full_reg_size(s);
5854 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5855 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5856 TCGLabel *over;
4ac430e1 5857 TCGv_i64 temp, clean_addr;
68459864 5858
c0ed9166
RH
5859 if (!sve_access_check(s)) {
5860 return true;
5861 }
5862
5863 over = gen_new_label();
5864
68459864
RH
5865 /* If the guarding predicate has no bits set, no load occurs. */
5866 if (psz <= 8) {
5867 /* Reduce the pred_esz_masks value simply to reduce the
5868 * size of the code generated here.
5869 */
5870 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5871 temp = tcg_temp_new_i64();
5872 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5873 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5874 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5875 tcg_temp_free_i64(temp);
5876 } else {
5877 TCGv_i32 t32 = tcg_temp_new_i32();
5878 find_last_active(s, t32, esz, a->pg);
5879 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5880 tcg_temp_free_i32(t32);
5881 }
5882
5883 /* Load the data. */
5884 temp = tcg_temp_new_i64();
d0e372b0 5885 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5886 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5887
5888 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5889 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5890
5891 /* Broadcast to *all* elements. */
5892 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5893 vsz, vsz, temp);
5894 tcg_temp_free_i64(temp);
5895
5896 /* Zero the inactive elements. */
5897 gen_set_label(over);
60245996 5898 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5899}
5900
1a039c7e
RH
5901static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5902 int msz, int esz, int nreg)
5903{
71b9f394
RH
5904 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5905 { { { gen_helper_sve_st1bb_r,
5906 gen_helper_sve_st1bh_r,
5907 gen_helper_sve_st1bs_r,
5908 gen_helper_sve_st1bd_r },
5909 { NULL,
5910 gen_helper_sve_st1hh_le_r,
5911 gen_helper_sve_st1hs_le_r,
5912 gen_helper_sve_st1hd_le_r },
5913 { NULL, NULL,
5914 gen_helper_sve_st1ss_le_r,
5915 gen_helper_sve_st1sd_le_r },
5916 { NULL, NULL, NULL,
5917 gen_helper_sve_st1dd_le_r } },
5918 { { gen_helper_sve_st1bb_r,
5919 gen_helper_sve_st1bh_r,
5920 gen_helper_sve_st1bs_r,
5921 gen_helper_sve_st1bd_r },
5922 { NULL,
5923 gen_helper_sve_st1hh_be_r,
5924 gen_helper_sve_st1hs_be_r,
5925 gen_helper_sve_st1hd_be_r },
5926 { NULL, NULL,
5927 gen_helper_sve_st1ss_be_r,
5928 gen_helper_sve_st1sd_be_r },
5929 { NULL, NULL, NULL,
5930 gen_helper_sve_st1dd_be_r } } },
5931
5932 { { { gen_helper_sve_st1bb_r_mte,
5933 gen_helper_sve_st1bh_r_mte,
5934 gen_helper_sve_st1bs_r_mte,
5935 gen_helper_sve_st1bd_r_mte },
5936 { NULL,
5937 gen_helper_sve_st1hh_le_r_mte,
5938 gen_helper_sve_st1hs_le_r_mte,
5939 gen_helper_sve_st1hd_le_r_mte },
5940 { NULL, NULL,
5941 gen_helper_sve_st1ss_le_r_mte,
5942 gen_helper_sve_st1sd_le_r_mte },
5943 { NULL, NULL, NULL,
5944 gen_helper_sve_st1dd_le_r_mte } },
5945 { { gen_helper_sve_st1bb_r_mte,
5946 gen_helper_sve_st1bh_r_mte,
5947 gen_helper_sve_st1bs_r_mte,
5948 gen_helper_sve_st1bd_r_mte },
5949 { NULL,
5950 gen_helper_sve_st1hh_be_r_mte,
5951 gen_helper_sve_st1hs_be_r_mte,
5952 gen_helper_sve_st1hd_be_r_mte },
5953 { NULL, NULL,
5954 gen_helper_sve_st1ss_be_r_mte,
5955 gen_helper_sve_st1sd_be_r_mte },
5956 { NULL, NULL, NULL,
5957 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5958 };
71b9f394
RH
5959 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5960 { { { gen_helper_sve_st2bb_r,
5961 gen_helper_sve_st2hh_le_r,
5962 gen_helper_sve_st2ss_le_r,
5963 gen_helper_sve_st2dd_le_r },
5964 { gen_helper_sve_st3bb_r,
5965 gen_helper_sve_st3hh_le_r,
5966 gen_helper_sve_st3ss_le_r,
5967 gen_helper_sve_st3dd_le_r },
5968 { gen_helper_sve_st4bb_r,
5969 gen_helper_sve_st4hh_le_r,
5970 gen_helper_sve_st4ss_le_r,
5971 gen_helper_sve_st4dd_le_r } },
5972 { { gen_helper_sve_st2bb_r,
5973 gen_helper_sve_st2hh_be_r,
5974 gen_helper_sve_st2ss_be_r,
5975 gen_helper_sve_st2dd_be_r },
5976 { gen_helper_sve_st3bb_r,
5977 gen_helper_sve_st3hh_be_r,
5978 gen_helper_sve_st3ss_be_r,
5979 gen_helper_sve_st3dd_be_r },
5980 { gen_helper_sve_st4bb_r,
5981 gen_helper_sve_st4hh_be_r,
5982 gen_helper_sve_st4ss_be_r,
5983 gen_helper_sve_st4dd_be_r } } },
5984 { { { gen_helper_sve_st2bb_r_mte,
5985 gen_helper_sve_st2hh_le_r_mte,
5986 gen_helper_sve_st2ss_le_r_mte,
5987 gen_helper_sve_st2dd_le_r_mte },
5988 { gen_helper_sve_st3bb_r_mte,
5989 gen_helper_sve_st3hh_le_r_mte,
5990 gen_helper_sve_st3ss_le_r_mte,
5991 gen_helper_sve_st3dd_le_r_mte },
5992 { gen_helper_sve_st4bb_r_mte,
5993 gen_helper_sve_st4hh_le_r_mte,
5994 gen_helper_sve_st4ss_le_r_mte,
5995 gen_helper_sve_st4dd_le_r_mte } },
5996 { { gen_helper_sve_st2bb_r_mte,
5997 gen_helper_sve_st2hh_be_r_mte,
5998 gen_helper_sve_st2ss_be_r_mte,
5999 gen_helper_sve_st2dd_be_r_mte },
6000 { gen_helper_sve_st3bb_r_mte,
6001 gen_helper_sve_st3hh_be_r_mte,
6002 gen_helper_sve_st3ss_be_r_mte,
6003 gen_helper_sve_st3dd_be_r_mte },
6004 { gen_helper_sve_st4bb_r_mte,
6005 gen_helper_sve_st4hh_be_r_mte,
6006 gen_helper_sve_st4ss_be_r_mte,
6007 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
6008 };
6009 gen_helper_gvec_mem *fn;
28d57f2d 6010 int be = s->be_data == MO_BE;
1a039c7e
RH
6011
6012 if (nreg == 0) {
6013 /* ST1 */
71b9f394
RH
6014 fn = fn_single[s->mte_active[0]][be][msz][esz];
6015 nreg = 1;
1a039c7e
RH
6016 } else {
6017 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
6018 assert(msz == esz);
71b9f394 6019 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
6020 }
6021 assert(fn != NULL);
71b9f394 6022 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
6023}
6024
3a7be554 6025static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
6026{
6027 if (a->rm == 31 || a->msz > a->esz) {
6028 return false;
6029 }
6030 if (sve_access_check(s)) {
6031 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 6032 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
6033 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
6034 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6035 }
6036 return true;
6037}
6038
3a7be554 6039static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
6040{
6041 if (a->msz > a->esz) {
6042 return false;
6043 }
6044 if (sve_access_check(s)) {
6045 int vsz = vec_full_reg_size(s);
6046 int elements = vsz >> a->esz;
6047 TCGv_i64 addr = new_tmp_a64(s);
6048
6049 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
6050 (a->imm * elements * (a->nreg + 1)) << a->msz);
6051 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6052 }
6053 return true;
6054}
f6dbf62a
RH
6055
6056/*
6057 *** SVE gather loads / scatter stores
6058 */
6059
500d0484 6060static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 6061 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 6062 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
6063{
6064 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
6065 TCGv_ptr t_zm = tcg_temp_new_ptr();
6066 TCGv_ptr t_pg = tcg_temp_new_ptr();
6067 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 6068 TCGv_i32 t_desc;
d28d12f0 6069 int desc = 0;
500d0484 6070
d28d12f0
RH
6071 if (s->mte_active[0]) {
6072 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
6073 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
6074 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
6075 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 6076 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
6077 desc <<= SVE_MTEDESC_SHIFT;
6078 }
cdecb3fc 6079 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 6080 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
6081
6082 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
6083 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
6084 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 6085 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
6086
6087 tcg_temp_free_ptr(t_zt);
6088 tcg_temp_free_ptr(t_zm);
6089 tcg_temp_free_ptr(t_pg);
500d0484 6090 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
6091}
6092
d28d12f0
RH
6093/* Indexed by [mte][be][ff][xs][u][msz]. */
6094static gen_helper_gvec_mem_scatter * const
6095gather_load_fn32[2][2][2][2][2][3] = {
6096 { /* MTE Inactive */
6097 { /* Little-endian */
6098 { { { gen_helper_sve_ldbss_zsu,
6099 gen_helper_sve_ldhss_le_zsu,
6100 NULL, },
6101 { gen_helper_sve_ldbsu_zsu,
6102 gen_helper_sve_ldhsu_le_zsu,
6103 gen_helper_sve_ldss_le_zsu, } },
6104 { { gen_helper_sve_ldbss_zss,
6105 gen_helper_sve_ldhss_le_zss,
6106 NULL, },
6107 { gen_helper_sve_ldbsu_zss,
6108 gen_helper_sve_ldhsu_le_zss,
6109 gen_helper_sve_ldss_le_zss, } } },
6110
6111 /* First-fault */
6112 { { { gen_helper_sve_ldffbss_zsu,
6113 gen_helper_sve_ldffhss_le_zsu,
6114 NULL, },
6115 { gen_helper_sve_ldffbsu_zsu,
6116 gen_helper_sve_ldffhsu_le_zsu,
6117 gen_helper_sve_ldffss_le_zsu, } },
6118 { { gen_helper_sve_ldffbss_zss,
6119 gen_helper_sve_ldffhss_le_zss,
6120 NULL, },
6121 { gen_helper_sve_ldffbsu_zss,
6122 gen_helper_sve_ldffhsu_le_zss,
6123 gen_helper_sve_ldffss_le_zss, } } } },
6124
6125 { /* Big-endian */
6126 { { { gen_helper_sve_ldbss_zsu,
6127 gen_helper_sve_ldhss_be_zsu,
6128 NULL, },
6129 { gen_helper_sve_ldbsu_zsu,
6130 gen_helper_sve_ldhsu_be_zsu,
6131 gen_helper_sve_ldss_be_zsu, } },
6132 { { gen_helper_sve_ldbss_zss,
6133 gen_helper_sve_ldhss_be_zss,
6134 NULL, },
6135 { gen_helper_sve_ldbsu_zss,
6136 gen_helper_sve_ldhsu_be_zss,
6137 gen_helper_sve_ldss_be_zss, } } },
6138
6139 /* First-fault */
6140 { { { gen_helper_sve_ldffbss_zsu,
6141 gen_helper_sve_ldffhss_be_zsu,
6142 NULL, },
6143 { gen_helper_sve_ldffbsu_zsu,
6144 gen_helper_sve_ldffhsu_be_zsu,
6145 gen_helper_sve_ldffss_be_zsu, } },
6146 { { gen_helper_sve_ldffbss_zss,
6147 gen_helper_sve_ldffhss_be_zss,
6148 NULL, },
6149 { gen_helper_sve_ldffbsu_zss,
6150 gen_helper_sve_ldffhsu_be_zss,
6151 gen_helper_sve_ldffss_be_zss, } } } } },
6152 { /* MTE Active */
6153 { /* Little-endian */
6154 { { { gen_helper_sve_ldbss_zsu_mte,
6155 gen_helper_sve_ldhss_le_zsu_mte,
6156 NULL, },
6157 { gen_helper_sve_ldbsu_zsu_mte,
6158 gen_helper_sve_ldhsu_le_zsu_mte,
6159 gen_helper_sve_ldss_le_zsu_mte, } },
6160 { { gen_helper_sve_ldbss_zss_mte,
6161 gen_helper_sve_ldhss_le_zss_mte,
6162 NULL, },
6163 { gen_helper_sve_ldbsu_zss_mte,
6164 gen_helper_sve_ldhsu_le_zss_mte,
6165 gen_helper_sve_ldss_le_zss_mte, } } },
6166
6167 /* First-fault */
6168 { { { gen_helper_sve_ldffbss_zsu_mte,
6169 gen_helper_sve_ldffhss_le_zsu_mte,
6170 NULL, },
6171 { gen_helper_sve_ldffbsu_zsu_mte,
6172 gen_helper_sve_ldffhsu_le_zsu_mte,
6173 gen_helper_sve_ldffss_le_zsu_mte, } },
6174 { { gen_helper_sve_ldffbss_zss_mte,
6175 gen_helper_sve_ldffhss_le_zss_mte,
6176 NULL, },
6177 { gen_helper_sve_ldffbsu_zss_mte,
6178 gen_helper_sve_ldffhsu_le_zss_mte,
6179 gen_helper_sve_ldffss_le_zss_mte, } } } },
6180
6181 { /* Big-endian */
6182 { { { gen_helper_sve_ldbss_zsu_mte,
6183 gen_helper_sve_ldhss_be_zsu_mte,
6184 NULL, },
6185 { gen_helper_sve_ldbsu_zsu_mte,
6186 gen_helper_sve_ldhsu_be_zsu_mte,
6187 gen_helper_sve_ldss_be_zsu_mte, } },
6188 { { gen_helper_sve_ldbss_zss_mte,
6189 gen_helper_sve_ldhss_be_zss_mte,
6190 NULL, },
6191 { gen_helper_sve_ldbsu_zss_mte,
6192 gen_helper_sve_ldhsu_be_zss_mte,
6193 gen_helper_sve_ldss_be_zss_mte, } } },
6194
6195 /* First-fault */
6196 { { { gen_helper_sve_ldffbss_zsu_mte,
6197 gen_helper_sve_ldffhss_be_zsu_mte,
6198 NULL, },
6199 { gen_helper_sve_ldffbsu_zsu_mte,
6200 gen_helper_sve_ldffhsu_be_zsu_mte,
6201 gen_helper_sve_ldffss_be_zsu_mte, } },
6202 { { gen_helper_sve_ldffbss_zss_mte,
6203 gen_helper_sve_ldffhss_be_zss_mte,
6204 NULL, },
6205 { gen_helper_sve_ldffbsu_zss_mte,
6206 gen_helper_sve_ldffhsu_be_zss_mte,
6207 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
6208};
6209
6210/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6211static gen_helper_gvec_mem_scatter * const
6212gather_load_fn64[2][2][2][3][2][4] = {
6213 { /* MTE Inactive */
6214 { /* Little-endian */
6215 { { { gen_helper_sve_ldbds_zsu,
6216 gen_helper_sve_ldhds_le_zsu,
6217 gen_helper_sve_ldsds_le_zsu,
6218 NULL, },
6219 { gen_helper_sve_ldbdu_zsu,
6220 gen_helper_sve_ldhdu_le_zsu,
6221 gen_helper_sve_ldsdu_le_zsu,
6222 gen_helper_sve_lddd_le_zsu, } },
6223 { { gen_helper_sve_ldbds_zss,
6224 gen_helper_sve_ldhds_le_zss,
6225 gen_helper_sve_ldsds_le_zss,
6226 NULL, },
6227 { gen_helper_sve_ldbdu_zss,
6228 gen_helper_sve_ldhdu_le_zss,
6229 gen_helper_sve_ldsdu_le_zss,
6230 gen_helper_sve_lddd_le_zss, } },
6231 { { gen_helper_sve_ldbds_zd,
6232 gen_helper_sve_ldhds_le_zd,
6233 gen_helper_sve_ldsds_le_zd,
6234 NULL, },
6235 { gen_helper_sve_ldbdu_zd,
6236 gen_helper_sve_ldhdu_le_zd,
6237 gen_helper_sve_ldsdu_le_zd,
6238 gen_helper_sve_lddd_le_zd, } } },
6239
6240 /* First-fault */
6241 { { { gen_helper_sve_ldffbds_zsu,
6242 gen_helper_sve_ldffhds_le_zsu,
6243 gen_helper_sve_ldffsds_le_zsu,
6244 NULL, },
6245 { gen_helper_sve_ldffbdu_zsu,
6246 gen_helper_sve_ldffhdu_le_zsu,
6247 gen_helper_sve_ldffsdu_le_zsu,
6248 gen_helper_sve_ldffdd_le_zsu, } },
6249 { { gen_helper_sve_ldffbds_zss,
6250 gen_helper_sve_ldffhds_le_zss,
6251 gen_helper_sve_ldffsds_le_zss,
6252 NULL, },
6253 { gen_helper_sve_ldffbdu_zss,
6254 gen_helper_sve_ldffhdu_le_zss,
6255 gen_helper_sve_ldffsdu_le_zss,
6256 gen_helper_sve_ldffdd_le_zss, } },
6257 { { gen_helper_sve_ldffbds_zd,
6258 gen_helper_sve_ldffhds_le_zd,
6259 gen_helper_sve_ldffsds_le_zd,
6260 NULL, },
6261 { gen_helper_sve_ldffbdu_zd,
6262 gen_helper_sve_ldffhdu_le_zd,
6263 gen_helper_sve_ldffsdu_le_zd,
6264 gen_helper_sve_ldffdd_le_zd, } } } },
6265 { /* Big-endian */
6266 { { { gen_helper_sve_ldbds_zsu,
6267 gen_helper_sve_ldhds_be_zsu,
6268 gen_helper_sve_ldsds_be_zsu,
6269 NULL, },
6270 { gen_helper_sve_ldbdu_zsu,
6271 gen_helper_sve_ldhdu_be_zsu,
6272 gen_helper_sve_ldsdu_be_zsu,
6273 gen_helper_sve_lddd_be_zsu, } },
6274 { { gen_helper_sve_ldbds_zss,
6275 gen_helper_sve_ldhds_be_zss,
6276 gen_helper_sve_ldsds_be_zss,
6277 NULL, },
6278 { gen_helper_sve_ldbdu_zss,
6279 gen_helper_sve_ldhdu_be_zss,
6280 gen_helper_sve_ldsdu_be_zss,
6281 gen_helper_sve_lddd_be_zss, } },
6282 { { gen_helper_sve_ldbds_zd,
6283 gen_helper_sve_ldhds_be_zd,
6284 gen_helper_sve_ldsds_be_zd,
6285 NULL, },
6286 { gen_helper_sve_ldbdu_zd,
6287 gen_helper_sve_ldhdu_be_zd,
6288 gen_helper_sve_ldsdu_be_zd,
6289 gen_helper_sve_lddd_be_zd, } } },
6290
6291 /* First-fault */
6292 { { { gen_helper_sve_ldffbds_zsu,
6293 gen_helper_sve_ldffhds_be_zsu,
6294 gen_helper_sve_ldffsds_be_zsu,
6295 NULL, },
6296 { gen_helper_sve_ldffbdu_zsu,
6297 gen_helper_sve_ldffhdu_be_zsu,
6298 gen_helper_sve_ldffsdu_be_zsu,
6299 gen_helper_sve_ldffdd_be_zsu, } },
6300 { { gen_helper_sve_ldffbds_zss,
6301 gen_helper_sve_ldffhds_be_zss,
6302 gen_helper_sve_ldffsds_be_zss,
6303 NULL, },
6304 { gen_helper_sve_ldffbdu_zss,
6305 gen_helper_sve_ldffhdu_be_zss,
6306 gen_helper_sve_ldffsdu_be_zss,
6307 gen_helper_sve_ldffdd_be_zss, } },
6308 { { gen_helper_sve_ldffbds_zd,
6309 gen_helper_sve_ldffhds_be_zd,
6310 gen_helper_sve_ldffsds_be_zd,
6311 NULL, },
6312 { gen_helper_sve_ldffbdu_zd,
6313 gen_helper_sve_ldffhdu_be_zd,
6314 gen_helper_sve_ldffsdu_be_zd,
6315 gen_helper_sve_ldffdd_be_zd, } } } } },
6316 { /* MTE Active */
6317 { /* Little-endian */
6318 { { { gen_helper_sve_ldbds_zsu_mte,
6319 gen_helper_sve_ldhds_le_zsu_mte,
6320 gen_helper_sve_ldsds_le_zsu_mte,
6321 NULL, },
6322 { gen_helper_sve_ldbdu_zsu_mte,
6323 gen_helper_sve_ldhdu_le_zsu_mte,
6324 gen_helper_sve_ldsdu_le_zsu_mte,
6325 gen_helper_sve_lddd_le_zsu_mte, } },
6326 { { gen_helper_sve_ldbds_zss_mte,
6327 gen_helper_sve_ldhds_le_zss_mte,
6328 gen_helper_sve_ldsds_le_zss_mte,
6329 NULL, },
6330 { gen_helper_sve_ldbdu_zss_mte,
6331 gen_helper_sve_ldhdu_le_zss_mte,
6332 gen_helper_sve_ldsdu_le_zss_mte,
6333 gen_helper_sve_lddd_le_zss_mte, } },
6334 { { gen_helper_sve_ldbds_zd_mte,
6335 gen_helper_sve_ldhds_le_zd_mte,
6336 gen_helper_sve_ldsds_le_zd_mte,
6337 NULL, },
6338 { gen_helper_sve_ldbdu_zd_mte,
6339 gen_helper_sve_ldhdu_le_zd_mte,
6340 gen_helper_sve_ldsdu_le_zd_mte,
6341 gen_helper_sve_lddd_le_zd_mte, } } },
6342
6343 /* First-fault */
6344 { { { gen_helper_sve_ldffbds_zsu_mte,
6345 gen_helper_sve_ldffhds_le_zsu_mte,
6346 gen_helper_sve_ldffsds_le_zsu_mte,
6347 NULL, },
6348 { gen_helper_sve_ldffbdu_zsu_mte,
6349 gen_helper_sve_ldffhdu_le_zsu_mte,
6350 gen_helper_sve_ldffsdu_le_zsu_mte,
6351 gen_helper_sve_ldffdd_le_zsu_mte, } },
6352 { { gen_helper_sve_ldffbds_zss_mte,
6353 gen_helper_sve_ldffhds_le_zss_mte,
6354 gen_helper_sve_ldffsds_le_zss_mte,
6355 NULL, },
6356 { gen_helper_sve_ldffbdu_zss_mte,
6357 gen_helper_sve_ldffhdu_le_zss_mte,
6358 gen_helper_sve_ldffsdu_le_zss_mte,
6359 gen_helper_sve_ldffdd_le_zss_mte, } },
6360 { { gen_helper_sve_ldffbds_zd_mte,
6361 gen_helper_sve_ldffhds_le_zd_mte,
6362 gen_helper_sve_ldffsds_le_zd_mte,
6363 NULL, },
6364 { gen_helper_sve_ldffbdu_zd_mte,
6365 gen_helper_sve_ldffhdu_le_zd_mte,
6366 gen_helper_sve_ldffsdu_le_zd_mte,
6367 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6368 { /* Big-endian */
6369 { { { gen_helper_sve_ldbds_zsu_mte,
6370 gen_helper_sve_ldhds_be_zsu_mte,
6371 gen_helper_sve_ldsds_be_zsu_mte,
6372 NULL, },
6373 { gen_helper_sve_ldbdu_zsu_mte,
6374 gen_helper_sve_ldhdu_be_zsu_mte,
6375 gen_helper_sve_ldsdu_be_zsu_mte,
6376 gen_helper_sve_lddd_be_zsu_mte, } },
6377 { { gen_helper_sve_ldbds_zss_mte,
6378 gen_helper_sve_ldhds_be_zss_mte,
6379 gen_helper_sve_ldsds_be_zss_mte,
6380 NULL, },
6381 { gen_helper_sve_ldbdu_zss_mte,
6382 gen_helper_sve_ldhdu_be_zss_mte,
6383 gen_helper_sve_ldsdu_be_zss_mte,
6384 gen_helper_sve_lddd_be_zss_mte, } },
6385 { { gen_helper_sve_ldbds_zd_mte,
6386 gen_helper_sve_ldhds_be_zd_mte,
6387 gen_helper_sve_ldsds_be_zd_mte,
6388 NULL, },
6389 { gen_helper_sve_ldbdu_zd_mte,
6390 gen_helper_sve_ldhdu_be_zd_mte,
6391 gen_helper_sve_ldsdu_be_zd_mte,
6392 gen_helper_sve_lddd_be_zd_mte, } } },
6393
6394 /* First-fault */
6395 { { { gen_helper_sve_ldffbds_zsu_mte,
6396 gen_helper_sve_ldffhds_be_zsu_mte,
6397 gen_helper_sve_ldffsds_be_zsu_mte,
6398 NULL, },
6399 { gen_helper_sve_ldffbdu_zsu_mte,
6400 gen_helper_sve_ldffhdu_be_zsu_mte,
6401 gen_helper_sve_ldffsdu_be_zsu_mte,
6402 gen_helper_sve_ldffdd_be_zsu_mte, } },
6403 { { gen_helper_sve_ldffbds_zss_mte,
6404 gen_helper_sve_ldffhds_be_zss_mte,
6405 gen_helper_sve_ldffsds_be_zss_mte,
6406 NULL, },
6407 { gen_helper_sve_ldffbdu_zss_mte,
6408 gen_helper_sve_ldffhdu_be_zss_mte,
6409 gen_helper_sve_ldffsdu_be_zss_mte,
6410 gen_helper_sve_ldffdd_be_zss_mte, } },
6411 { { gen_helper_sve_ldffbds_zd_mte,
6412 gen_helper_sve_ldffhds_be_zd_mte,
6413 gen_helper_sve_ldffsds_be_zd_mte,
6414 NULL, },
6415 { gen_helper_sve_ldffbdu_zd_mte,
6416 gen_helper_sve_ldffhdu_be_zd_mte,
6417 gen_helper_sve_ldffsdu_be_zd_mte,
6418 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6419};
6420
3a7be554 6421static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6422{
6423 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6424 bool be = s->be_data == MO_BE;
6425 bool mte = s->mte_active[0];
673e9fa6
RH
6426
6427 if (!sve_access_check(s)) {
6428 return true;
6429 }
6430
6431 switch (a->esz) {
6432 case MO_32:
d28d12f0 6433 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6434 break;
6435 case MO_64:
d28d12f0 6436 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6437 break;
6438 }
6439 assert(fn != NULL);
6440
6441 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6442 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6443 return true;
6444}
6445
3a7be554 6446static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6447{
6448 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6449 bool be = s->be_data == MO_BE;
6450 bool mte = s->mte_active[0];
673e9fa6
RH
6451 TCGv_i64 imm;
6452
6453 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6454 return false;
6455 }
6456 if (!sve_access_check(s)) {
6457 return true;
6458 }
6459
6460 switch (a->esz) {
6461 case MO_32:
d28d12f0 6462 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6463 break;
6464 case MO_64:
d28d12f0 6465 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6466 break;
6467 }
6468 assert(fn != NULL);
6469
6470 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6471 * by loading the immediate into the scalar parameter.
6472 */
6473 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 6474 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
6475 tcg_temp_free_i64(imm);
6476 return true;
6477}
6478
cf327449
SL
6479static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6480{
b17ab470
RH
6481 gen_helper_gvec_mem_scatter *fn = NULL;
6482 bool be = s->be_data == MO_BE;
6483 bool mte = s->mte_active[0];
6484
6485 if (a->esz < a->msz + !a->u) {
6486 return false;
6487 }
cf327449
SL
6488 if (!dc_isar_feature(aa64_sve2, s)) {
6489 return false;
6490 }
b17ab470
RH
6491 if (!sve_access_check(s)) {
6492 return true;
6493 }
6494
6495 switch (a->esz) {
6496 case MO_32:
6497 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6498 break;
6499 case MO_64:
6500 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6501 break;
6502 }
6503 assert(fn != NULL);
6504
6505 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6506 cpu_reg(s, a->rm), a->msz, false, fn);
6507 return true;
cf327449
SL
6508}
6509
d28d12f0
RH
6510/* Indexed by [mte][be][xs][msz]. */
6511static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6512 { /* MTE Inactive */
6513 { /* Little-endian */
6514 { gen_helper_sve_stbs_zsu,
6515 gen_helper_sve_sths_le_zsu,
6516 gen_helper_sve_stss_le_zsu, },
6517 { gen_helper_sve_stbs_zss,
6518 gen_helper_sve_sths_le_zss,
6519 gen_helper_sve_stss_le_zss, } },
6520 { /* Big-endian */
6521 { gen_helper_sve_stbs_zsu,
6522 gen_helper_sve_sths_be_zsu,
6523 gen_helper_sve_stss_be_zsu, },
6524 { gen_helper_sve_stbs_zss,
6525 gen_helper_sve_sths_be_zss,
6526 gen_helper_sve_stss_be_zss, } } },
6527 { /* MTE Active */
6528 { /* Little-endian */
6529 { gen_helper_sve_stbs_zsu_mte,
6530 gen_helper_sve_sths_le_zsu_mte,
6531 gen_helper_sve_stss_le_zsu_mte, },
6532 { gen_helper_sve_stbs_zss_mte,
6533 gen_helper_sve_sths_le_zss_mte,
6534 gen_helper_sve_stss_le_zss_mte, } },
6535 { /* Big-endian */
6536 { gen_helper_sve_stbs_zsu_mte,
6537 gen_helper_sve_sths_be_zsu_mte,
6538 gen_helper_sve_stss_be_zsu_mte, },
6539 { gen_helper_sve_stbs_zss_mte,
6540 gen_helper_sve_sths_be_zss_mte,
6541 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6542};
6543
6544/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6545static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6546 { /* MTE Inactive */
6547 { /* Little-endian */
6548 { gen_helper_sve_stbd_zsu,
6549 gen_helper_sve_sthd_le_zsu,
6550 gen_helper_sve_stsd_le_zsu,
6551 gen_helper_sve_stdd_le_zsu, },
6552 { gen_helper_sve_stbd_zss,
6553 gen_helper_sve_sthd_le_zss,
6554 gen_helper_sve_stsd_le_zss,
6555 gen_helper_sve_stdd_le_zss, },
6556 { gen_helper_sve_stbd_zd,
6557 gen_helper_sve_sthd_le_zd,
6558 gen_helper_sve_stsd_le_zd,
6559 gen_helper_sve_stdd_le_zd, } },
6560 { /* Big-endian */
6561 { gen_helper_sve_stbd_zsu,
6562 gen_helper_sve_sthd_be_zsu,
6563 gen_helper_sve_stsd_be_zsu,
6564 gen_helper_sve_stdd_be_zsu, },
6565 { gen_helper_sve_stbd_zss,
6566 gen_helper_sve_sthd_be_zss,
6567 gen_helper_sve_stsd_be_zss,
6568 gen_helper_sve_stdd_be_zss, },
6569 { gen_helper_sve_stbd_zd,
6570 gen_helper_sve_sthd_be_zd,
6571 gen_helper_sve_stsd_be_zd,
6572 gen_helper_sve_stdd_be_zd, } } },
6573 { /* MTE Inactive */
6574 { /* Little-endian */
6575 { gen_helper_sve_stbd_zsu_mte,
6576 gen_helper_sve_sthd_le_zsu_mte,
6577 gen_helper_sve_stsd_le_zsu_mte,
6578 gen_helper_sve_stdd_le_zsu_mte, },
6579 { gen_helper_sve_stbd_zss_mte,
6580 gen_helper_sve_sthd_le_zss_mte,
6581 gen_helper_sve_stsd_le_zss_mte,
6582 gen_helper_sve_stdd_le_zss_mte, },
6583 { gen_helper_sve_stbd_zd_mte,
6584 gen_helper_sve_sthd_le_zd_mte,
6585 gen_helper_sve_stsd_le_zd_mte,
6586 gen_helper_sve_stdd_le_zd_mte, } },
6587 { /* Big-endian */
6588 { gen_helper_sve_stbd_zsu_mte,
6589 gen_helper_sve_sthd_be_zsu_mte,
6590 gen_helper_sve_stsd_be_zsu_mte,
6591 gen_helper_sve_stdd_be_zsu_mte, },
6592 { gen_helper_sve_stbd_zss_mte,
6593 gen_helper_sve_sthd_be_zss_mte,
6594 gen_helper_sve_stsd_be_zss_mte,
6595 gen_helper_sve_stdd_be_zss_mte, },
6596 { gen_helper_sve_stbd_zd_mte,
6597 gen_helper_sve_sthd_be_zd_mte,
6598 gen_helper_sve_stsd_be_zd_mte,
6599 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6600};
6601
3a7be554 6602static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6603{
f6dbf62a 6604 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6605 bool be = s->be_data == MO_BE;
6606 bool mte = s->mte_active[0];
f6dbf62a
RH
6607
6608 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6609 return false;
6610 }
6611 if (!sve_access_check(s)) {
6612 return true;
6613 }
6614 switch (a->esz) {
6615 case MO_32:
d28d12f0 6616 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6617 break;
6618 case MO_64:
d28d12f0 6619 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6620 break;
6621 default:
6622 g_assert_not_reached();
6623 }
6624 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6625 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6626 return true;
6627}
dec6cf6b 6628
3a7be554 6629static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6630{
6631 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6632 bool be = s->be_data == MO_BE;
6633 bool mte = s->mte_active[0];
408ecde9
RH
6634 TCGv_i64 imm;
6635
6636 if (a->esz < a->msz) {
6637 return false;
6638 }
6639 if (!sve_access_check(s)) {
6640 return true;
6641 }
6642
6643 switch (a->esz) {
6644 case MO_32:
d28d12f0 6645 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6646 break;
6647 case MO_64:
d28d12f0 6648 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6649 break;
6650 }
6651 assert(fn != NULL);
6652
6653 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6654 * by loading the immediate into the scalar parameter.
6655 */
6656 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 6657 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
6658 tcg_temp_free_i64(imm);
6659 return true;
6660}
6661
6ebca45f
SL
6662static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6663{
b17ab470
RH
6664 gen_helper_gvec_mem_scatter *fn;
6665 bool be = s->be_data == MO_BE;
6666 bool mte = s->mte_active[0];
6667
6668 if (a->esz < a->msz) {
6669 return false;
6670 }
6ebca45f
SL
6671 if (!dc_isar_feature(aa64_sve2, s)) {
6672 return false;
6673 }
b17ab470
RH
6674 if (!sve_access_check(s)) {
6675 return true;
6676 }
6677
6678 switch (a->esz) {
6679 case MO_32:
6680 fn = scatter_store_fn32[mte][be][0][a->msz];
6681 break;
6682 case MO_64:
6683 fn = scatter_store_fn64[mte][be][2][a->msz];
6684 break;
6685 default:
6686 g_assert_not_reached();
6687 }
6688
6689 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6690 cpu_reg(s, a->rm), a->msz, true, fn);
6691 return true;
6ebca45f
SL
6692}
6693
dec6cf6b
RH
6694/*
6695 * Prefetches
6696 */
6697
3a7be554 6698static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6699{
6700 /* Prefetch is a nop within QEMU. */
2f95a3b0 6701 (void)sve_access_check(s);
dec6cf6b
RH
6702 return true;
6703}
6704
3a7be554 6705static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6706{
6707 if (a->rm == 31) {
6708 return false;
6709 }
6710 /* Prefetch is a nop within QEMU. */
2f95a3b0 6711 (void)sve_access_check(s);
dec6cf6b
RH
6712 return true;
6713}
a2103582
RH
6714
6715/*
6716 * Move Prefix
6717 *
6718 * TODO: The implementation so far could handle predicated merging movprfx.
6719 * The helper functions as written take an extra source register to
6720 * use in the operation, but the result is only written when predication
6721 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6722 * to allow the final write back to the destination to be unconditional.
6723 * For predicated zeroing movprfx, we need to rearrange the helpers to
6724 * allow the final write back to zero inactives.
6725 *
6726 * In the meantime, just emit the moves.
6727 */
6728
3a7be554 6729static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6730{
6731 return do_mov_z(s, a->rd, a->rn);
6732}
6733
3a7be554 6734static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6735{
6736 if (sve_access_check(s)) {
6737 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6738 }
6739 return true;
6740}
6741
3a7be554 6742static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6743{
60245996 6744 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6745}
5dad1ba5
RH
6746
6747/*
6748 * SVE2 Integer Multiply - Unpredicated
6749 */
6750
6751static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6752{
6753 if (!dc_isar_feature(aa64_sve2, s)) {
6754 return false;
6755 }
6756 if (sve_access_check(s)) {
6757 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6758 }
6759 return true;
6760}
6761
6762static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6763 gen_helper_gvec_3 *fn)
6764{
6765 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6766 return false;
6767 }
6768 if (sve_access_check(s)) {
6769 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6770 }
6771 return true;
6772}
6773
6774static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6775{
6776 static gen_helper_gvec_3 * const fns[4] = {
6777 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6778 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6779 };
6780 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6781}
6782
6783static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6784{
6785 static gen_helper_gvec_3 * const fns[4] = {
6786 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6787 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6788 };
6789 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6790}
6791
6792static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6793{
6794 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6795}
d4b1e59d 6796
169d7c58
RH
6797static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6798{
6799 static gen_helper_gvec_3 * const fns[4] = {
6800 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6801 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6802 };
6803 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6804}
6805
6806static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6807{
6808 static gen_helper_gvec_3 * const fns[4] = {
6809 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6810 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6811 };
6812 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6813}
6814
d4b1e59d
RH
6815/*
6816 * SVE2 Integer - Predicated
6817 */
6818
6819static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6820 gen_helper_gvec_4 *fn)
6821{
6822 if (!dc_isar_feature(aa64_sve2, s)) {
6823 return false;
6824 }
6825 return do_zpzz_ool(s, a, fn);
6826}
6827
6828static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6829{
6830 static gen_helper_gvec_4 * const fns[3] = {
6831 gen_helper_sve2_sadalp_zpzz_h,
6832 gen_helper_sve2_sadalp_zpzz_s,
6833 gen_helper_sve2_sadalp_zpzz_d,
6834 };
6835 if (a->esz == 0) {
6836 return false;
6837 }
6838 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6839}
6840
6841static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6842{
6843 static gen_helper_gvec_4 * const fns[3] = {
6844 gen_helper_sve2_uadalp_zpzz_h,
6845 gen_helper_sve2_uadalp_zpzz_s,
6846 gen_helper_sve2_uadalp_zpzz_d,
6847 };
6848 if (a->esz == 0) {
6849 return false;
6850 }
6851 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6852}
db366da8
RH
6853
6854/*
6855 * SVE2 integer unary operations (predicated)
6856 */
6857
6858static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6859 gen_helper_gvec_3 *fn)
6860{
6861 if (!dc_isar_feature(aa64_sve2, s)) {
6862 return false;
6863 }
6864 return do_zpz_ool(s, a, fn);
6865}
6866
6867static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6868{
6869 if (a->esz != 2) {
6870 return false;
6871 }
6872 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6873}
6874
6875static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6876{
6877 if (a->esz != 2) {
6878 return false;
6879 }
6880 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6881}
6882
6883static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6884{
6885 static gen_helper_gvec_3 * const fns[4] = {
6886 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6887 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6888 };
6889 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6890}
6891
6892static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6893{
6894 static gen_helper_gvec_3 * const fns[4] = {
6895 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6896 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6897 };
6898 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6899}
45d9503d
RH
6900
6901#define DO_SVE2_ZPZZ(NAME, name) \
6902static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6903{ \
6904 static gen_helper_gvec_4 * const fns[4] = { \
6905 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6906 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6907 }; \
6908 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6909}
6910
6911DO_SVE2_ZPZZ(SQSHL, sqshl)
6912DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6913DO_SVE2_ZPZZ(SRSHL, srshl)
6914
6915DO_SVE2_ZPZZ(UQSHL, uqshl)
6916DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6917DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6918
6919DO_SVE2_ZPZZ(SHADD, shadd)
6920DO_SVE2_ZPZZ(SRHADD, srhadd)
6921DO_SVE2_ZPZZ(SHSUB, shsub)
6922
6923DO_SVE2_ZPZZ(UHADD, uhadd)
6924DO_SVE2_ZPZZ(URHADD, urhadd)
6925DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6926
6927DO_SVE2_ZPZZ(ADDP, addp)
6928DO_SVE2_ZPZZ(SMAXP, smaxp)
6929DO_SVE2_ZPZZ(UMAXP, umaxp)
6930DO_SVE2_ZPZZ(SMINP, sminp)
6931DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6932
6933DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6934DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6935DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6936DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6937DO_SVE2_ZPZZ(SUQADD, suqadd)
6938DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6939
6940/*
6941 * SVE2 Widening Integer Arithmetic
6942 */
6943
6944static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6945 gen_helper_gvec_3 *fn, int data)
6946{
6947 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6948 return false;
6949 }
6950 if (sve_access_check(s)) {
6951 unsigned vsz = vec_full_reg_size(s);
6952 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6953 vec_full_reg_offset(s, a->rn),
6954 vec_full_reg_offset(s, a->rm),
6955 vsz, vsz, data, fn);
6956 }
6957 return true;
6958}
6959
6960#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6961static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6962{ \
6963 static gen_helper_gvec_3 * const fns[4] = { \
6964 NULL, gen_helper_sve2_##name##_h, \
6965 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6966 }; \
6967 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6968}
6969
6970DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6971DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6972DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6973
6974DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6975DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6976DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6977
6978DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6979DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6980DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6981
6982DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6983DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6984DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
daec426b
RH
6985
6986DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6987DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6988DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
81fccf09 6989
69ccc099
RH
6990DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6991DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6992
6993DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6994DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6995
6996DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6997DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6998
2df3ca55
RH
6999static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
7000{
7001 static gen_helper_gvec_3 * const fns[4] = {
7002 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
7003 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
7004 };
7005 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
7006}
7007
7008static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
7009{
7010 return do_eor_tb(s, a, false);
7011}
7012
7013static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
7014{
7015 return do_eor_tb(s, a, true);
7016}
7017
e3a56131
RH
7018static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
7019{
7020 static gen_helper_gvec_3 * const fns[4] = {
7021 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
7022 NULL, gen_helper_sve2_pmull_d,
7023 };
7024 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
7025 return false;
7026 }
7027 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
7028}
7029
7030static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
7031{
7032 return do_trans_pmull(s, a, false);
7033}
7034
7035static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
7036{
7037 return do_trans_pmull(s, a, true);
7038}
7039
81fccf09
RH
7040#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
7041static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
7042{ \
7043 static gen_helper_gvec_3 * const fns[4] = { \
7044 NULL, gen_helper_sve2_##name##_h, \
7045 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7046 }; \
7047 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
7048}
7049
7050DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
7051DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
7052DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
7053DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
7054
7055DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
7056DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
7057DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
7058DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
4269fef1
RH
7059
7060static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7061{
7062 int top = imm & 1;
7063 int shl = imm >> 1;
7064 int halfbits = 4 << vece;
7065
7066 if (top) {
7067 if (shl == halfbits) {
7068 TCGv_vec t = tcg_temp_new_vec_matching(d);
7069 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7070 tcg_gen_and_vec(vece, d, n, t);
7071 tcg_temp_free_vec(t);
7072 } else {
7073 tcg_gen_sari_vec(vece, d, n, halfbits);
7074 tcg_gen_shli_vec(vece, d, d, shl);
7075 }
7076 } else {
7077 tcg_gen_shli_vec(vece, d, n, halfbits);
7078 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
7079 }
7080}
7081
7082static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
7083{
7084 int halfbits = 4 << vece;
7085 int top = imm & 1;
7086 int shl = (imm >> 1);
7087 int shift;
7088 uint64_t mask;
7089
7090 mask = MAKE_64BIT_MASK(0, halfbits);
7091 mask <<= shl;
7092 mask = dup_const(vece, mask);
7093
7094 shift = shl - top * halfbits;
7095 if (shift < 0) {
7096 tcg_gen_shri_i64(d, n, -shift);
7097 } else {
7098 tcg_gen_shli_i64(d, n, shift);
7099 }
7100 tcg_gen_andi_i64(d, d, mask);
7101}
7102
7103static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7104{
7105 gen_ushll_i64(MO_16, d, n, imm);
7106}
7107
7108static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7109{
7110 gen_ushll_i64(MO_32, d, n, imm);
7111}
7112
7113static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7114{
7115 gen_ushll_i64(MO_64, d, n, imm);
7116}
7117
7118static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7119{
7120 int halfbits = 4 << vece;
7121 int top = imm & 1;
7122 int shl = imm >> 1;
7123
7124 if (top) {
7125 if (shl == halfbits) {
7126 TCGv_vec t = tcg_temp_new_vec_matching(d);
7127 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7128 tcg_gen_and_vec(vece, d, n, t);
7129 tcg_temp_free_vec(t);
7130 } else {
7131 tcg_gen_shri_vec(vece, d, n, halfbits);
7132 tcg_gen_shli_vec(vece, d, d, shl);
7133 }
7134 } else {
7135 if (shl == 0) {
7136 TCGv_vec t = tcg_temp_new_vec_matching(d);
7137 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7138 tcg_gen_and_vec(vece, d, n, t);
7139 tcg_temp_free_vec(t);
7140 } else {
7141 tcg_gen_shli_vec(vece, d, n, halfbits);
7142 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7143 }
7144 }
7145}
7146
7147static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7148 bool sel, bool uns)
7149{
7150 static const TCGOpcode sshll_list[] = {
7151 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7152 };
7153 static const TCGOpcode ushll_list[] = {
7154 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7155 };
7156 static const GVecGen2i ops[2][3] = {
7157 { { .fniv = gen_sshll_vec,
7158 .opt_opc = sshll_list,
7159 .fno = gen_helper_sve2_sshll_h,
7160 .vece = MO_16 },
7161 { .fniv = gen_sshll_vec,
7162 .opt_opc = sshll_list,
7163 .fno = gen_helper_sve2_sshll_s,
7164 .vece = MO_32 },
7165 { .fniv = gen_sshll_vec,
7166 .opt_opc = sshll_list,
7167 .fno = gen_helper_sve2_sshll_d,
7168 .vece = MO_64 } },
7169 { { .fni8 = gen_ushll16_i64,
7170 .fniv = gen_ushll_vec,
7171 .opt_opc = ushll_list,
7172 .fno = gen_helper_sve2_ushll_h,
7173 .vece = MO_16 },
7174 { .fni8 = gen_ushll32_i64,
7175 .fniv = gen_ushll_vec,
7176 .opt_opc = ushll_list,
7177 .fno = gen_helper_sve2_ushll_s,
7178 .vece = MO_32 },
7179 { .fni8 = gen_ushll64_i64,
7180 .fniv = gen_ushll_vec,
7181 .opt_opc = ushll_list,
7182 .fno = gen_helper_sve2_ushll_d,
7183 .vece = MO_64 } },
7184 };
7185
7186 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7187 return false;
7188 }
7189 if (sve_access_check(s)) {
7190 unsigned vsz = vec_full_reg_size(s);
7191 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7192 vec_full_reg_offset(s, a->rn),
7193 vsz, vsz, (a->imm << 1) | sel,
7194 &ops[uns][a->esz]);
7195 }
7196 return true;
7197}
7198
7199static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7200{
7201 return do_sve2_shll_tb(s, a, false, false);
7202}
7203
7204static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7205{
7206 return do_sve2_shll_tb(s, a, true, false);
7207}
7208
7209static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7210{
7211 return do_sve2_shll_tb(s, a, false, true);
7212}
7213
7214static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7215{
7216 return do_sve2_shll_tb(s, a, true, true);
7217}
cb9c33b8
RH
7218
7219static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7220{
7221 static gen_helper_gvec_3 * const fns[4] = {
7222 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7223 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7224 };
7225 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7226 return false;
7227 }
7228 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7229}
7230
7231static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7232{
7233 static gen_helper_gvec_3 * const fns[4] = {
7234 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7235 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7236 };
7237 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7238 return false;
7239 }
7240 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7241}
7242
7243static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7244{
7245 static gen_helper_gvec_3 * const fns[4] = {
7246 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7247 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7248 };
7249 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7250 return false;
7251 }
7252 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7253}
ed4a6387
RH
7254
7255static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7256{
7257 static gen_helper_gvec_3 * const fns[2][4] = {
7258 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7259 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7260 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7261 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7262 };
7263 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7264}
7265
7266static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7267{
7268 return do_cadd(s, a, false, false);
7269}
7270
7271static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7272{
7273 return do_cadd(s, a, false, true);
7274}
7275
7276static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7277{
7278 return do_cadd(s, a, true, false);
7279}
7280
7281static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7282{
7283 return do_cadd(s, a, true, true);
7284}
38650638
RH
7285
7286static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7287 gen_helper_gvec_4 *fn, int data)
7288{
7289 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7290 return false;
7291 }
7292 if (sve_access_check(s)) {
7293 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7294 }
7295 return true;
7296}
7297
7298static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7299{
7300 static gen_helper_gvec_4 * const fns[2][4] = {
7301 { NULL, gen_helper_sve2_sabal_h,
7302 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7303 { NULL, gen_helper_sve2_uabal_h,
7304 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7305 };
7306 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7307}
7308
7309static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7310{
7311 return do_abal(s, a, false, false);
7312}
7313
7314static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7315{
7316 return do_abal(s, a, false, true);
7317}
7318
7319static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7320{
7321 return do_abal(s, a, true, false);
7322}
7323
7324static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7325{
7326 return do_abal(s, a, true, true);
7327}
b8295dfb
RH
7328
7329static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7330{
7331 static gen_helper_gvec_4 * const fns[2] = {
7332 gen_helper_sve2_adcl_s,
7333 gen_helper_sve2_adcl_d,
7334 };
7335 /*
7336 * Note that in this case the ESZ field encodes both size and sign.
7337 * Split out 'subtract' into bit 1 of the data field for the helper.
7338 */
7339 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7340}
7341
7342static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7343{
7344 return do_adcl(s, a, false);
7345}
7346
7347static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7348{
7349 return do_adcl(s, a, true);
7350}
a7e3a90e
RH
7351
7352static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7353{
7354 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7355 return false;
7356 }
7357 if (sve_access_check(s)) {
7358 unsigned vsz = vec_full_reg_size(s);
7359 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7360 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7361 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7362 }
7363 return true;
7364}
7365
7366static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7367{
7368 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7369}
7370
7371static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7372{
7373 return do_sve2_fn2i(s, a, gen_gvec_usra);
7374}
7375
7376static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7377{
7378 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7379}
7380
7381static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7382{
7383 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7384}
fc12b46a
RH
7385
7386static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7387{
7388 return do_sve2_fn2i(s, a, gen_gvec_sri);
7389}
7390
7391static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7392{
7393 return do_sve2_fn2i(s, a, gen_gvec_sli);
7394}
289a1797
RH
7395
7396static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7397{
7398 if (!dc_isar_feature(aa64_sve2, s)) {
7399 return false;
7400 }
7401 if (sve_access_check(s)) {
7402 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7403 }
7404 return true;
7405}
7406
7407static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7408{
7409 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7410}
7411
7412static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7413{
7414 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7415}
5ff2838d
RH
7416
7417static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7418 const GVecGen2 ops[3])
7419{
7420 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7421 !dc_isar_feature(aa64_sve2, s)) {
7422 return false;
7423 }
7424 if (sve_access_check(s)) {
7425 unsigned vsz = vec_full_reg_size(s);
7426 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7427 vec_full_reg_offset(s, a->rn),
7428 vsz, vsz, &ops[a->esz]);
7429 }
7430 return true;
7431}
7432
7433static const TCGOpcode sqxtn_list[] = {
7434 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7435};
7436
7437static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7438{
7439 TCGv_vec t = tcg_temp_new_vec_matching(d);
7440 int halfbits = 4 << vece;
7441 int64_t mask = (1ull << halfbits) - 1;
7442 int64_t min = -1ull << (halfbits - 1);
7443 int64_t max = -min - 1;
7444
7445 tcg_gen_dupi_vec(vece, t, min);
7446 tcg_gen_smax_vec(vece, d, n, t);
7447 tcg_gen_dupi_vec(vece, t, max);
7448 tcg_gen_smin_vec(vece, d, d, t);
7449 tcg_gen_dupi_vec(vece, t, mask);
7450 tcg_gen_and_vec(vece, d, d, t);
7451 tcg_temp_free_vec(t);
7452}
7453
7454static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7455{
7456 static const GVecGen2 ops[3] = {
7457 { .fniv = gen_sqxtnb_vec,
7458 .opt_opc = sqxtn_list,
7459 .fno = gen_helper_sve2_sqxtnb_h,
7460 .vece = MO_16 },
7461 { .fniv = gen_sqxtnb_vec,
7462 .opt_opc = sqxtn_list,
7463 .fno = gen_helper_sve2_sqxtnb_s,
7464 .vece = MO_32 },
7465 { .fniv = gen_sqxtnb_vec,
7466 .opt_opc = sqxtn_list,
7467 .fno = gen_helper_sve2_sqxtnb_d,
7468 .vece = MO_64 },
7469 };
7470 return do_sve2_narrow_extract(s, a, ops);
7471}
7472
7473static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7474{
7475 TCGv_vec t = tcg_temp_new_vec_matching(d);
7476 int halfbits = 4 << vece;
7477 int64_t mask = (1ull << halfbits) - 1;
7478 int64_t min = -1ull << (halfbits - 1);
7479 int64_t max = -min - 1;
7480
7481 tcg_gen_dupi_vec(vece, t, min);
7482 tcg_gen_smax_vec(vece, n, n, t);
7483 tcg_gen_dupi_vec(vece, t, max);
7484 tcg_gen_smin_vec(vece, n, n, t);
7485 tcg_gen_shli_vec(vece, n, n, halfbits);
7486 tcg_gen_dupi_vec(vece, t, mask);
7487 tcg_gen_bitsel_vec(vece, d, t, d, n);
7488 tcg_temp_free_vec(t);
7489}
7490
7491static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7492{
7493 static const GVecGen2 ops[3] = {
7494 { .fniv = gen_sqxtnt_vec,
7495 .opt_opc = sqxtn_list,
7496 .load_dest = true,
7497 .fno = gen_helper_sve2_sqxtnt_h,
7498 .vece = MO_16 },
7499 { .fniv = gen_sqxtnt_vec,
7500 .opt_opc = sqxtn_list,
7501 .load_dest = true,
7502 .fno = gen_helper_sve2_sqxtnt_s,
7503 .vece = MO_32 },
7504 { .fniv = gen_sqxtnt_vec,
7505 .opt_opc = sqxtn_list,
7506 .load_dest = true,
7507 .fno = gen_helper_sve2_sqxtnt_d,
7508 .vece = MO_64 },
7509 };
7510 return do_sve2_narrow_extract(s, a, ops);
7511}
7512
7513static const TCGOpcode uqxtn_list[] = {
7514 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7515};
7516
7517static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7518{
7519 TCGv_vec t = tcg_temp_new_vec_matching(d);
7520 int halfbits = 4 << vece;
7521 int64_t max = (1ull << halfbits) - 1;
7522
7523 tcg_gen_dupi_vec(vece, t, max);
7524 tcg_gen_umin_vec(vece, d, n, t);
7525 tcg_temp_free_vec(t);
7526}
7527
7528static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7529{
7530 static const GVecGen2 ops[3] = {
7531 { .fniv = gen_uqxtnb_vec,
7532 .opt_opc = uqxtn_list,
7533 .fno = gen_helper_sve2_uqxtnb_h,
7534 .vece = MO_16 },
7535 { .fniv = gen_uqxtnb_vec,
7536 .opt_opc = uqxtn_list,
7537 .fno = gen_helper_sve2_uqxtnb_s,
7538 .vece = MO_32 },
7539 { .fniv = gen_uqxtnb_vec,
7540 .opt_opc = uqxtn_list,
7541 .fno = gen_helper_sve2_uqxtnb_d,
7542 .vece = MO_64 },
7543 };
7544 return do_sve2_narrow_extract(s, a, ops);
7545}
7546
7547static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7548{
7549 TCGv_vec t = tcg_temp_new_vec_matching(d);
7550 int halfbits = 4 << vece;
7551 int64_t max = (1ull << halfbits) - 1;
7552
7553 tcg_gen_dupi_vec(vece, t, max);
7554 tcg_gen_umin_vec(vece, n, n, t);
7555 tcg_gen_shli_vec(vece, n, n, halfbits);
7556 tcg_gen_bitsel_vec(vece, d, t, d, n);
7557 tcg_temp_free_vec(t);
7558}
7559
7560static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7561{
7562 static const GVecGen2 ops[3] = {
7563 { .fniv = gen_uqxtnt_vec,
7564 .opt_opc = uqxtn_list,
7565 .load_dest = true,
7566 .fno = gen_helper_sve2_uqxtnt_h,
7567 .vece = MO_16 },
7568 { .fniv = gen_uqxtnt_vec,
7569 .opt_opc = uqxtn_list,
7570 .load_dest = true,
7571 .fno = gen_helper_sve2_uqxtnt_s,
7572 .vece = MO_32 },
7573 { .fniv = gen_uqxtnt_vec,
7574 .opt_opc = uqxtn_list,
7575 .load_dest = true,
7576 .fno = gen_helper_sve2_uqxtnt_d,
7577 .vece = MO_64 },
7578 };
7579 return do_sve2_narrow_extract(s, a, ops);
7580}
7581
7582static const TCGOpcode sqxtun_list[] = {
7583 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7584};
7585
7586static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7587{
7588 TCGv_vec t = tcg_temp_new_vec_matching(d);
7589 int halfbits = 4 << vece;
7590 int64_t max = (1ull << halfbits) - 1;
7591
7592 tcg_gen_dupi_vec(vece, t, 0);
7593 tcg_gen_smax_vec(vece, d, n, t);
7594 tcg_gen_dupi_vec(vece, t, max);
7595 tcg_gen_umin_vec(vece, d, d, t);
7596 tcg_temp_free_vec(t);
7597}
7598
7599static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7600{
7601 static const GVecGen2 ops[3] = {
7602 { .fniv = gen_sqxtunb_vec,
7603 .opt_opc = sqxtun_list,
7604 .fno = gen_helper_sve2_sqxtunb_h,
7605 .vece = MO_16 },
7606 { .fniv = gen_sqxtunb_vec,
7607 .opt_opc = sqxtun_list,
7608 .fno = gen_helper_sve2_sqxtunb_s,
7609 .vece = MO_32 },
7610 { .fniv = gen_sqxtunb_vec,
7611 .opt_opc = sqxtun_list,
7612 .fno = gen_helper_sve2_sqxtunb_d,
7613 .vece = MO_64 },
7614 };
7615 return do_sve2_narrow_extract(s, a, ops);
7616}
7617
7618static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7619{
7620 TCGv_vec t = tcg_temp_new_vec_matching(d);
7621 int halfbits = 4 << vece;
7622 int64_t max = (1ull << halfbits) - 1;
7623
7624 tcg_gen_dupi_vec(vece, t, 0);
7625 tcg_gen_smax_vec(vece, n, n, t);
7626 tcg_gen_dupi_vec(vece, t, max);
7627 tcg_gen_umin_vec(vece, n, n, t);
7628 tcg_gen_shli_vec(vece, n, n, halfbits);
7629 tcg_gen_bitsel_vec(vece, d, t, d, n);
7630 tcg_temp_free_vec(t);
7631}
7632
7633static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7634{
7635 static const GVecGen2 ops[3] = {
7636 { .fniv = gen_sqxtunt_vec,
7637 .opt_opc = sqxtun_list,
7638 .load_dest = true,
7639 .fno = gen_helper_sve2_sqxtunt_h,
7640 .vece = MO_16 },
7641 { .fniv = gen_sqxtunt_vec,
7642 .opt_opc = sqxtun_list,
7643 .load_dest = true,
7644 .fno = gen_helper_sve2_sqxtunt_s,
7645 .vece = MO_32 },
7646 { .fniv = gen_sqxtunt_vec,
7647 .opt_opc = sqxtun_list,
7648 .load_dest = true,
7649 .fno = gen_helper_sve2_sqxtunt_d,
7650 .vece = MO_64 },
7651 };
7652 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7653}
7654
7655static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7656 const GVecGen2i ops[3])
7657{
7658 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7659 return false;
7660 }
7661 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7662 if (sve_access_check(s)) {
7663 unsigned vsz = vec_full_reg_size(s);
7664 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7665 vec_full_reg_offset(s, a->rn),
7666 vsz, vsz, a->imm, &ops[a->esz]);
7667 }
7668 return true;
7669}
7670
7671static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7672{
7673 int halfbits = 4 << vece;
7674 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7675
7676 tcg_gen_shri_i64(d, n, shr);
7677 tcg_gen_andi_i64(d, d, mask);
7678}
7679
7680static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7681{
7682 gen_shrnb_i64(MO_16, d, n, shr);
7683}
7684
7685static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7686{
7687 gen_shrnb_i64(MO_32, d, n, shr);
7688}
7689
7690static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7691{
7692 gen_shrnb_i64(MO_64, d, n, shr);
7693}
7694
7695static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7696{
7697 TCGv_vec t = tcg_temp_new_vec_matching(d);
7698 int halfbits = 4 << vece;
7699 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7700
7701 tcg_gen_shri_vec(vece, n, n, shr);
7702 tcg_gen_dupi_vec(vece, t, mask);
7703 tcg_gen_and_vec(vece, d, n, t);
7704 tcg_temp_free_vec(t);
7705}
7706
7707static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7708{
7709 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7710 static const GVecGen2i ops[3] = {
7711 { .fni8 = gen_shrnb16_i64,
7712 .fniv = gen_shrnb_vec,
7713 .opt_opc = vec_list,
7714 .fno = gen_helper_sve2_shrnb_h,
7715 .vece = MO_16 },
7716 { .fni8 = gen_shrnb32_i64,
7717 .fniv = gen_shrnb_vec,
7718 .opt_opc = vec_list,
7719 .fno = gen_helper_sve2_shrnb_s,
7720 .vece = MO_32 },
7721 { .fni8 = gen_shrnb64_i64,
7722 .fniv = gen_shrnb_vec,
7723 .opt_opc = vec_list,
7724 .fno = gen_helper_sve2_shrnb_d,
7725 .vece = MO_64 },
7726 };
7727 return do_sve2_shr_narrow(s, a, ops);
7728}
7729
7730static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7731{
7732 int halfbits = 4 << vece;
7733 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7734
7735 tcg_gen_shli_i64(n, n, halfbits - shr);
7736 tcg_gen_andi_i64(n, n, ~mask);
7737 tcg_gen_andi_i64(d, d, mask);
7738 tcg_gen_or_i64(d, d, n);
7739}
7740
7741static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7742{
7743 gen_shrnt_i64(MO_16, d, n, shr);
7744}
7745
7746static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7747{
7748 gen_shrnt_i64(MO_32, d, n, shr);
7749}
7750
7751static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7752{
7753 tcg_gen_shri_i64(n, n, shr);
7754 tcg_gen_deposit_i64(d, d, n, 32, 32);
7755}
7756
7757static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7758{
7759 TCGv_vec t = tcg_temp_new_vec_matching(d);
7760 int halfbits = 4 << vece;
7761 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7762
7763 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7764 tcg_gen_dupi_vec(vece, t, mask);
7765 tcg_gen_bitsel_vec(vece, d, t, d, n);
7766 tcg_temp_free_vec(t);
7767}
7768
7769static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7770{
7771 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7772 static const GVecGen2i ops[3] = {
7773 { .fni8 = gen_shrnt16_i64,
7774 .fniv = gen_shrnt_vec,
7775 .opt_opc = vec_list,
7776 .load_dest = true,
7777 .fno = gen_helper_sve2_shrnt_h,
7778 .vece = MO_16 },
7779 { .fni8 = gen_shrnt32_i64,
7780 .fniv = gen_shrnt_vec,
7781 .opt_opc = vec_list,
7782 .load_dest = true,
7783 .fno = gen_helper_sve2_shrnt_s,
7784 .vece = MO_32 },
7785 { .fni8 = gen_shrnt64_i64,
7786 .fniv = gen_shrnt_vec,
7787 .opt_opc = vec_list,
7788 .load_dest = true,
7789 .fno = gen_helper_sve2_shrnt_d,
7790 .vece = MO_64 },
7791 };
7792 return do_sve2_shr_narrow(s, a, ops);
7793}
7794
7795static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7796{
7797 static const GVecGen2i ops[3] = {
7798 { .fno = gen_helper_sve2_rshrnb_h },
7799 { .fno = gen_helper_sve2_rshrnb_s },
7800 { .fno = gen_helper_sve2_rshrnb_d },
7801 };
7802 return do_sve2_shr_narrow(s, a, ops);
7803}
7804
7805static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7806{
7807 static const GVecGen2i ops[3] = {
7808 { .fno = gen_helper_sve2_rshrnt_h },
7809 { .fno = gen_helper_sve2_rshrnt_s },
7810 { .fno = gen_helper_sve2_rshrnt_d },
7811 };
7812 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7813}
7814
7815static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7816 TCGv_vec n, int64_t shr)
7817{
7818 TCGv_vec t = tcg_temp_new_vec_matching(d);
7819 int halfbits = 4 << vece;
7820
7821 tcg_gen_sari_vec(vece, n, n, shr);
7822 tcg_gen_dupi_vec(vece, t, 0);
7823 tcg_gen_smax_vec(vece, n, n, t);
7824 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7825 tcg_gen_umin_vec(vece, d, n, t);
7826 tcg_temp_free_vec(t);
7827}
7828
7829static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7830{
7831 static const TCGOpcode vec_list[] = {
7832 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7833 };
7834 static const GVecGen2i ops[3] = {
7835 { .fniv = gen_sqshrunb_vec,
7836 .opt_opc = vec_list,
7837 .fno = gen_helper_sve2_sqshrunb_h,
7838 .vece = MO_16 },
7839 { .fniv = gen_sqshrunb_vec,
7840 .opt_opc = vec_list,
7841 .fno = gen_helper_sve2_sqshrunb_s,
7842 .vece = MO_32 },
7843 { .fniv = gen_sqshrunb_vec,
7844 .opt_opc = vec_list,
7845 .fno = gen_helper_sve2_sqshrunb_d,
7846 .vece = MO_64 },
7847 };
7848 return do_sve2_shr_narrow(s, a, ops);
7849}
7850
7851static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7852 TCGv_vec n, int64_t shr)
7853{
7854 TCGv_vec t = tcg_temp_new_vec_matching(d);
7855 int halfbits = 4 << vece;
7856
7857 tcg_gen_sari_vec(vece, n, n, shr);
7858 tcg_gen_dupi_vec(vece, t, 0);
7859 tcg_gen_smax_vec(vece, n, n, t);
7860 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7861 tcg_gen_umin_vec(vece, n, n, t);
7862 tcg_gen_shli_vec(vece, n, n, halfbits);
7863 tcg_gen_bitsel_vec(vece, d, t, d, n);
7864 tcg_temp_free_vec(t);
7865}
7866
7867static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7868{
7869 static const TCGOpcode vec_list[] = {
7870 INDEX_op_shli_vec, INDEX_op_sari_vec,
7871 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7872 };
7873 static const GVecGen2i ops[3] = {
7874 { .fniv = gen_sqshrunt_vec,
7875 .opt_opc = vec_list,
7876 .load_dest = true,
7877 .fno = gen_helper_sve2_sqshrunt_h,
7878 .vece = MO_16 },
7879 { .fniv = gen_sqshrunt_vec,
7880 .opt_opc = vec_list,
7881 .load_dest = true,
7882 .fno = gen_helper_sve2_sqshrunt_s,
7883 .vece = MO_32 },
7884 { .fniv = gen_sqshrunt_vec,
7885 .opt_opc = vec_list,
7886 .load_dest = true,
7887 .fno = gen_helper_sve2_sqshrunt_d,
7888 .vece = MO_64 },
7889 };
7890 return do_sve2_shr_narrow(s, a, ops);
7891}
7892
7893static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7894{
7895 static const GVecGen2i ops[3] = {
7896 { .fno = gen_helper_sve2_sqrshrunb_h },
7897 { .fno = gen_helper_sve2_sqrshrunb_s },
7898 { .fno = gen_helper_sve2_sqrshrunb_d },
7899 };
7900 return do_sve2_shr_narrow(s, a, ops);
7901}
7902
7903static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7904{
7905 static const GVecGen2i ops[3] = {
7906 { .fno = gen_helper_sve2_sqrshrunt_h },
7907 { .fno = gen_helper_sve2_sqrshrunt_s },
7908 { .fno = gen_helper_sve2_sqrshrunt_d },
7909 };
7910 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7911}
7912
743bb147
RH
7913static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7914 TCGv_vec n, int64_t shr)
7915{
7916 TCGv_vec t = tcg_temp_new_vec_matching(d);
7917 int halfbits = 4 << vece;
7918 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7919 int64_t min = -max - 1;
7920
7921 tcg_gen_sari_vec(vece, n, n, shr);
7922 tcg_gen_dupi_vec(vece, t, min);
7923 tcg_gen_smax_vec(vece, n, n, t);
7924 tcg_gen_dupi_vec(vece, t, max);
7925 tcg_gen_smin_vec(vece, n, n, t);
7926 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7927 tcg_gen_and_vec(vece, d, n, t);
7928 tcg_temp_free_vec(t);
7929}
7930
7931static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7932{
7933 static const TCGOpcode vec_list[] = {
7934 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7935 };
7936 static const GVecGen2i ops[3] = {
7937 { .fniv = gen_sqshrnb_vec,
7938 .opt_opc = vec_list,
7939 .fno = gen_helper_sve2_sqshrnb_h,
7940 .vece = MO_16 },
7941 { .fniv = gen_sqshrnb_vec,
7942 .opt_opc = vec_list,
7943 .fno = gen_helper_sve2_sqshrnb_s,
7944 .vece = MO_32 },
7945 { .fniv = gen_sqshrnb_vec,
7946 .opt_opc = vec_list,
7947 .fno = gen_helper_sve2_sqshrnb_d,
7948 .vece = MO_64 },
7949 };
7950 return do_sve2_shr_narrow(s, a, ops);
7951}
7952
7953static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7954 TCGv_vec n, int64_t shr)
7955{
7956 TCGv_vec t = tcg_temp_new_vec_matching(d);
7957 int halfbits = 4 << vece;
7958 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7959 int64_t min = -max - 1;
7960
7961 tcg_gen_sari_vec(vece, n, n, shr);
7962 tcg_gen_dupi_vec(vece, t, min);
7963 tcg_gen_smax_vec(vece, n, n, t);
7964 tcg_gen_dupi_vec(vece, t, max);
7965 tcg_gen_smin_vec(vece, n, n, t);
7966 tcg_gen_shli_vec(vece, n, n, halfbits);
7967 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7968 tcg_gen_bitsel_vec(vece, d, t, d, n);
7969 tcg_temp_free_vec(t);
7970}
7971
7972static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7973{
7974 static const TCGOpcode vec_list[] = {
7975 INDEX_op_shli_vec, INDEX_op_sari_vec,
7976 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7977 };
7978 static const GVecGen2i ops[3] = {
7979 { .fniv = gen_sqshrnt_vec,
7980 .opt_opc = vec_list,
7981 .load_dest = true,
7982 .fno = gen_helper_sve2_sqshrnt_h,
7983 .vece = MO_16 },
7984 { .fniv = gen_sqshrnt_vec,
7985 .opt_opc = vec_list,
7986 .load_dest = true,
7987 .fno = gen_helper_sve2_sqshrnt_s,
7988 .vece = MO_32 },
7989 { .fniv = gen_sqshrnt_vec,
7990 .opt_opc = vec_list,
7991 .load_dest = true,
7992 .fno = gen_helper_sve2_sqshrnt_d,
7993 .vece = MO_64 },
7994 };
7995 return do_sve2_shr_narrow(s, a, ops);
7996}
7997
7998static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7999{
8000 static const GVecGen2i ops[3] = {
8001 { .fno = gen_helper_sve2_sqrshrnb_h },
8002 { .fno = gen_helper_sve2_sqrshrnb_s },
8003 { .fno = gen_helper_sve2_sqrshrnb_d },
8004 };
8005 return do_sve2_shr_narrow(s, a, ops);
8006}
8007
8008static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
8009{
8010 static const GVecGen2i ops[3] = {
8011 { .fno = gen_helper_sve2_sqrshrnt_h },
8012 { .fno = gen_helper_sve2_sqrshrnt_s },
8013 { .fno = gen_helper_sve2_sqrshrnt_d },
8014 };
8015 return do_sve2_shr_narrow(s, a, ops);
8016}
8017
c13418da
RH
8018static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
8019 TCGv_vec n, int64_t shr)
8020{
8021 TCGv_vec t = tcg_temp_new_vec_matching(d);
8022 int halfbits = 4 << vece;
8023
8024 tcg_gen_shri_vec(vece, n, n, shr);
8025 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8026 tcg_gen_umin_vec(vece, d, n, t);
8027 tcg_temp_free_vec(t);
8028}
8029
8030static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
8031{
8032 static const TCGOpcode vec_list[] = {
8033 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8034 };
8035 static const GVecGen2i ops[3] = {
8036 { .fniv = gen_uqshrnb_vec,
8037 .opt_opc = vec_list,
8038 .fno = gen_helper_sve2_uqshrnb_h,
8039 .vece = MO_16 },
8040 { .fniv = gen_uqshrnb_vec,
8041 .opt_opc = vec_list,
8042 .fno = gen_helper_sve2_uqshrnb_s,
8043 .vece = MO_32 },
8044 { .fniv = gen_uqshrnb_vec,
8045 .opt_opc = vec_list,
8046 .fno = gen_helper_sve2_uqshrnb_d,
8047 .vece = MO_64 },
8048 };
8049 return do_sve2_shr_narrow(s, a, ops);
8050}
8051
8052static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
8053 TCGv_vec n, int64_t shr)
8054{
8055 TCGv_vec t = tcg_temp_new_vec_matching(d);
8056 int halfbits = 4 << vece;
8057
8058 tcg_gen_shri_vec(vece, n, n, shr);
8059 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8060 tcg_gen_umin_vec(vece, n, n, t);
8061 tcg_gen_shli_vec(vece, n, n, halfbits);
8062 tcg_gen_bitsel_vec(vece, d, t, d, n);
8063 tcg_temp_free_vec(t);
8064}
8065
8066static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
8067{
8068 static const TCGOpcode vec_list[] = {
8069 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8070 };
8071 static const GVecGen2i ops[3] = {
8072 { .fniv = gen_uqshrnt_vec,
8073 .opt_opc = vec_list,
8074 .load_dest = true,
8075 .fno = gen_helper_sve2_uqshrnt_h,
8076 .vece = MO_16 },
8077 { .fniv = gen_uqshrnt_vec,
8078 .opt_opc = vec_list,
8079 .load_dest = true,
8080 .fno = gen_helper_sve2_uqshrnt_s,
8081 .vece = MO_32 },
8082 { .fniv = gen_uqshrnt_vec,
8083 .opt_opc = vec_list,
8084 .load_dest = true,
8085 .fno = gen_helper_sve2_uqshrnt_d,
8086 .vece = MO_64 },
8087 };
8088 return do_sve2_shr_narrow(s, a, ops);
8089}
8090
8091static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
8092{
8093 static const GVecGen2i ops[3] = {
8094 { .fno = gen_helper_sve2_uqrshrnb_h },
8095 { .fno = gen_helper_sve2_uqrshrnb_s },
8096 { .fno = gen_helper_sve2_uqrshrnb_d },
8097 };
8098 return do_sve2_shr_narrow(s, a, ops);
8099}
8100
8101static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8102{
8103 static const GVecGen2i ops[3] = {
8104 { .fno = gen_helper_sve2_uqrshrnt_h },
8105 { .fno = gen_helper_sve2_uqrshrnt_s },
8106 { .fno = gen_helper_sve2_uqrshrnt_d },
8107 };
8108 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 8109}
b87dbeeb 8110
40d5ea50
SL
8111#define DO_SVE2_ZZZ_NARROW(NAME, name) \
8112static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
8113{ \
8114 static gen_helper_gvec_3 * const fns[4] = { \
8115 NULL, gen_helper_sve2_##name##_h, \
8116 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
8117 }; \
8118 return do_sve2_zzz_ool(s, a, fns[a->esz]); \
8119}
8120
8121DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8122DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
8123DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8124DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 8125
c3cd6766
SL
8126DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8127DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
8128DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8129DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 8130
e0ae6ec3
SL
8131static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8132 gen_helper_gvec_flags_4 *fn)
8133{
8134 if (!dc_isar_feature(aa64_sve2, s)) {
8135 return false;
8136 }
8137 return do_ppzz_flags(s, a, fn);
8138}
8139
8140#define DO_SVE2_PPZZ_MATCH(NAME, name) \
8141static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8142{ \
8143 static gen_helper_gvec_flags_4 * const fns[4] = { \
8144 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
8145 NULL, NULL \
8146 }; \
8147 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
8148}
8149
8150DO_SVE2_PPZZ_MATCH(MATCH, match)
8151DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8152
7d47ac94
SL
8153static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8154{
8155 static gen_helper_gvec_4 * const fns[2] = {
8156 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8157 };
8158 if (a->esz < 2) {
8159 return false;
8160 }
8161 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8162}
8163
8164static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8165{
8166 if (a->esz != 0) {
8167 return false;
8168 }
8169 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8170}
8171
b87dbeeb
SL
8172static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8173 gen_helper_gvec_4_ptr *fn)
8174{
8175 if (!dc_isar_feature(aa64_sve2, s)) {
8176 return false;
8177 }
8178 return do_zpzz_fp(s, a, fn);
8179}
8180
8181#define DO_SVE2_ZPZZ_FP(NAME, name) \
8182static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8183{ \
8184 static gen_helper_gvec_4_ptr * const fns[4] = { \
8185 NULL, gen_helper_sve2_##name##_zpzz_h, \
8186 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
8187 }; \
8188 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
8189}
8190
8191DO_SVE2_ZPZZ_FP(FADDP, faddp)
8192DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8193DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8194DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8195DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
8196
8197/*
8198 * SVE Integer Multiply-Add (unpredicated)
8199 */
8200
4f26756b
SL
8201static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8202{
8203 gen_helper_gvec_4_ptr *fn;
8204
8205 switch (a->esz) {
8206 case MO_32:
8207 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8208 return false;
8209 }
8210 fn = gen_helper_fmmla_s;
8211 break;
8212 case MO_64:
8213 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8214 return false;
8215 }
8216 fn = gen_helper_fmmla_d;
8217 break;
8218 default:
8219 return false;
8220 }
8221
8222 if (sve_access_check(s)) {
8223 unsigned vsz = vec_full_reg_size(s);
8224 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8225 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8226 vec_full_reg_offset(s, a->rn),
8227 vec_full_reg_offset(s, a->rm),
8228 vec_full_reg_offset(s, a->ra),
8229 status, vsz, vsz, 0, fn);
8230 tcg_temp_free_ptr(status);
8231 }
8232 return true;
8233}
8234
bfc9307e
RH
8235static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8236 bool sel1, bool sel2)
8237{
8238 static gen_helper_gvec_4 * const fns[] = {
8239 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
8240 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8241 };
8242 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8243}
8244
8245static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8246 bool sel1, bool sel2)
8247{
8248 static gen_helper_gvec_4 * const fns[] = {
8249 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
8250 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8251 };
8252 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8253}
8254
8255static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8256{
8257 return do_sqdmlal_zzzw(s, a, false, false);
8258}
8259
8260static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8261{
8262 return do_sqdmlal_zzzw(s, a, true, true);
8263}
8264
8265static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8266{
8267 return do_sqdmlal_zzzw(s, a, false, true);
8268}
8269
8270static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8271{
8272 return do_sqdmlsl_zzzw(s, a, false, false);
8273}
8274
8275static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8276{
8277 return do_sqdmlsl_zzzw(s, a, true, true);
8278}
8279
8280static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8281{
8282 return do_sqdmlsl_zzzw(s, a, false, true);
8283}
ab3ddf31
RH
8284
8285static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8286{
8287 static gen_helper_gvec_4 * const fns[] = {
8288 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8289 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8290 };
8291 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8292}
8293
8294static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8295{
8296 static gen_helper_gvec_4 * const fns[] = {
8297 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8298 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8299 };
8300 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8301}
45a32e80
RH
8302
8303static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8304{
8305 static gen_helper_gvec_4 * const fns[] = {
8306 NULL, gen_helper_sve2_smlal_zzzw_h,
8307 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8308 };
8309 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8310}
8311
8312static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8313{
8314 return do_smlal_zzzw(s, a, false);
8315}
8316
8317static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8318{
8319 return do_smlal_zzzw(s, a, true);
8320}
8321
8322static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8323{
8324 static gen_helper_gvec_4 * const fns[] = {
8325 NULL, gen_helper_sve2_umlal_zzzw_h,
8326 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8327 };
8328 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8329}
8330
8331static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8332{
8333 return do_umlal_zzzw(s, a, false);
8334}
8335
8336static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8337{
8338 return do_umlal_zzzw(s, a, true);
8339}
8340
8341static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8342{
8343 static gen_helper_gvec_4 * const fns[] = {
8344 NULL, gen_helper_sve2_smlsl_zzzw_h,
8345 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8346 };
8347 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8348}
8349
8350static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8351{
8352 return do_smlsl_zzzw(s, a, false);
8353}
8354
8355static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8356{
8357 return do_smlsl_zzzw(s, a, true);
8358}
8359
8360static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8361{
8362 static gen_helper_gvec_4 * const fns[] = {
8363 NULL, gen_helper_sve2_umlsl_zzzw_h,
8364 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8365 };
8366 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8367}
8368
8369static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8370{
8371 return do_umlsl_zzzw(s, a, false);
8372}
8373
8374static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8375{
8376 return do_umlsl_zzzw(s, a, true);
8377}
d782d3ca
RH
8378
8379static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8380{
8381 static gen_helper_gvec_4 * const fns[] = {
8382 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8383 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8384 };
8385
8386 if (!dc_isar_feature(aa64_sve2, s)) {
8387 return false;
8388 }
8389 if (sve_access_check(s)) {
8390 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8391 }
8392 return true;
8393}
8394
21068f39
RH
8395static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8396{
8397 if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8398 return false;
8399 }
8400 if (sve_access_check(s)) {
8401 gen_helper_gvec_4 *fn = (a->esz == MO_32
8402 ? gen_helper_sve2_cdot_zzzz_s
8403 : gen_helper_sve2_cdot_zzzz_d);
8404 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8405 }
8406 return true;
8407}
8408
d782d3ca
RH
8409static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8410{
8411 static gen_helper_gvec_4 * const fns[] = {
8412 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8413 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8414 };
8415
8416 if (!dc_isar_feature(aa64_sve2, s)) {
8417 return false;
8418 }
8419 if (sve_access_check(s)) {
8420 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8421 }
8422 return true;
8423}
6a98cb2a
RH
8424
8425static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8426{
8427 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8428 return false;
8429 }
8430 if (sve_access_check(s)) {
8431 unsigned vsz = vec_full_reg_size(s);
8432 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8433 vec_full_reg_offset(s, a->rn),
8434 vec_full_reg_offset(s, a->rm),
8435 vec_full_reg_offset(s, a->ra),
8436 vsz, vsz, 0, gen_helper_gvec_usdot_b);
8437 }
8438 return true;
8439}
b2bcd1be
RH
8440
8441static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
8442{
8443 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8444 return false;
8445 }
8446 if (sve_access_check(s)) {
8447 gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
8448 }
8449 return true;
8450}
3cc7a88e
RH
8451
8452static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8453{
8454 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8455 return false;
8456 }
8457 if (sve_access_check(s)) {
8458 gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
8459 a->rd, a->rn, a->rm, decrypt);
8460 }
8461 return true;
8462}
8463
8464static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8465{
8466 return do_aese(s, a, false);
8467}
8468
8469static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8470{
8471 return do_aese(s, a, true);
8472}
8473
8474static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8475{
8476 if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8477 return false;
8478 }
8479 if (sve_access_check(s)) {
8480 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
8481 }
8482 return true;
8483}
8484
8485static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8486{
8487 return do_sm4(s, a, gen_helper_crypto_sm4e);
8488}
3358eb3f
RH
8489
8490static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8491{
8492 return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8493}
8494
8495static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8496{
8497 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8498 return false;
8499 }
8500 if (sve_access_check(s)) {
8501 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8502 }
8503 return true;
8504}
5c1b7226
RH
8505
8506static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8507{
8508 if (!dc_isar_feature(aa64_sve2, s)) {
8509 return false;
8510 }
8511 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8512}
8513
d29b17ca
RH
8514static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8515{
8516 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8517 return false;
8518 }
8519 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8520}
8521
5c1b7226
RH
8522static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8523{
8524 if (!dc_isar_feature(aa64_sve2, s)) {
8525 return false;
8526 }
8527 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8528}
83c2523f
SL
8529
8530static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8531{
8532 if (!dc_isar_feature(aa64_sve2, s)) {
8533 return false;
8534 }
8535 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8536}
8537
8538static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8539{
8540 if (!dc_isar_feature(aa64_sve2, s)) {
8541 return false;
8542 }
8543 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8544}
95365277
SL
8545
8546static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8547{
8548 if (!dc_isar_feature(aa64_sve2, s)) {
8549 return false;
8550 }
8551 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8552}
8553
8554static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8555{
8556 if (!dc_isar_feature(aa64_sve2, s)) {
8557 return false;
8558 }
8559 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8560}
631be02e
SL
8561
8562static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8563{
8564 static gen_helper_gvec_3_ptr * const fns[] = {
8565 NULL, gen_helper_flogb_h,
8566 gen_helper_flogb_s, gen_helper_flogb_d
8567 };
8568
8569 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8570 return false;
8571 }
8572 if (sve_access_check(s)) {
8573 TCGv_ptr status =
8574 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8575 unsigned vsz = vec_full_reg_size(s);
8576
8577 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8578 vec_full_reg_offset(s, a->rn),
8579 pred_full_reg_offset(s, a->pg),
8580 status, vsz, vsz, 0, fns[a->esz]);
8581 tcg_temp_free_ptr(status);
8582 }
8583 return true;
8584}
50d102bd
SL
8585
8586static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8587{
8588 if (!dc_isar_feature(aa64_sve2, s)) {
8589 return false;
8590 }
8591 if (sve_access_check(s)) {
8592 unsigned vsz = vec_full_reg_size(s);
8593 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8594 vec_full_reg_offset(s, a->rn),
8595 vec_full_reg_offset(s, a->rm),
8596 vec_full_reg_offset(s, a->ra),
8597 cpu_env, vsz, vsz, (sel << 1) | sub,
8598 gen_helper_sve2_fmlal_zzzw_s);
8599 }
8600 return true;
8601}
8602
8603static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8604{
8605 return do_FMLAL_zzzw(s, a, false, false);
8606}
8607
8608static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8609{
8610 return do_FMLAL_zzzw(s, a, false, true);
8611}
8612
8613static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8614{
8615 return do_FMLAL_zzzw(s, a, true, false);
8616}
8617
8618static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8619{
8620 return do_FMLAL_zzzw(s, a, true, true);
8621}
8622
8623static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8624{
8625 if (!dc_isar_feature(aa64_sve2, s)) {
8626 return false;
8627 }
8628 if (sve_access_check(s)) {
8629 unsigned vsz = vec_full_reg_size(s);
8630 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8631 vec_full_reg_offset(s, a->rn),
8632 vec_full_reg_offset(s, a->rm),
8633 vec_full_reg_offset(s, a->ra),
8634 cpu_env, vsz, vsz,
8635 (a->index << 2) | (sel << 1) | sub,
8636 gen_helper_sve2_fmlal_zzxw_s);
8637 }
8638 return true;
8639}
8640
8641static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8642{
8643 return do_FMLAL_zzxw(s, a, false, false);
8644}
8645
8646static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8647{
8648 return do_FMLAL_zzxw(s, a, false, true);
8649}
8650
8651static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8652{
8653 return do_FMLAL_zzxw(s, a, true, false);
8654}
8655
8656static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8657{
8658 return do_FMLAL_zzxw(s, a, true, true);
8659}
2323c5ff
RH
8660
8661static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8662 gen_helper_gvec_4 *fn, int data)
8663{
8664 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8665 return false;
8666 }
8667 if (sve_access_check(s)) {
8668 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8669 }
8670 return true;
8671}
8672
8673static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8674{
8675 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8676}
8677
8678static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8679{
8680 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8681}
8682
8683static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8684{
8685 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8686}
cb8657f7
RH
8687
8688static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8689{
8690 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8691 return false;
8692 }
8693 if (sve_access_check(s)) {
8694 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8695 a->rd, a->rn, a->rm, a->ra, 0);
8696 }
8697 return true;
8698}
83914478
RH
8699
8700static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8701{
8702 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8703 return false;
8704 }
8705 if (sve_access_check(s)) {
8706 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8707 a->rd, a->rn, a->rm, a->ra, a->index);
8708 }
8709 return true;
8710}
81266a1f
RH
8711
8712static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8713{
8714 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8715 return false;
8716 }
8717 if (sve_access_check(s)) {
8718 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8719 a->rd, a->rn, a->rm, a->ra, 0);
8720 }
8721 return true;
8722}
5693887f
RH
8723
8724static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8725{
8726 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8727 return false;
8728 }
8729 if (sve_access_check(s)) {
8730 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8731 unsigned vsz = vec_full_reg_size(s);
8732
8733 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8734 vec_full_reg_offset(s, a->rn),
8735 vec_full_reg_offset(s, a->rm),
8736 vec_full_reg_offset(s, a->ra),
8737 status, vsz, vsz, sel,
8738 gen_helper_gvec_bfmlal);
8739 tcg_temp_free_ptr(status);
8740 }
8741 return true;
8742}
8743
8744static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8745{
8746 return do_BFMLAL_zzzw(s, a, false);
8747}
8748
8749static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8750{
8751 return do_BFMLAL_zzzw(s, a, true);
8752}
458d0ab6
RH
8753
8754static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8755{
8756 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8757 return false;
8758 }
8759 if (sve_access_check(s)) {
8760 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8761 unsigned vsz = vec_full_reg_size(s);
8762
8763 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8764 vec_full_reg_offset(s, a->rn),
8765 vec_full_reg_offset(s, a->rm),
8766 vec_full_reg_offset(s, a->ra),
8767 status, vsz, vsz, (a->index << 1) | sel,
8768 gen_helper_gvec_bfmlal_idx);
8769 tcg_temp_free_ptr(status);
8770 }
8771 return true;
8772}
8773
8774static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8775{
8776 return do_BFMLAL_zzxw(s, a, false);
8777}
8778
8779static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8780{
8781 return do_BFMLAL_zzxw(s, a, true);
8782}