]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
Merge tag 'pull-request-2022-03-15v2' of https://gitlab.com/thuth/qemu into staging
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a
RH
139/* Invoke an out-of-line helper on 2 Zregs. */
140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
141 int rd, int rn, int data)
142{
143 unsigned vsz = vec_full_reg_size(s);
144 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
145 vec_full_reg_offset(s, rn),
146 vsz, vsz, data, fn);
147}
148
e645d1a1
RH
149/* Invoke an out-of-line helper on 3 Zregs. */
150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
151 int rd, int rn, int rm, int data)
152{
153 unsigned vsz = vec_full_reg_size(s);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
155 vec_full_reg_offset(s, rn),
156 vec_full_reg_offset(s, rm),
157 vsz, vsz, data, fn);
158}
159
38650638
RH
160/* Invoke an out-of-line helper on 4 Zregs. */
161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
162 int rd, int rn, int rm, int ra, int data)
163{
164 unsigned vsz = vec_full_reg_size(s);
165 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
166 vec_full_reg_offset(s, rn),
167 vec_full_reg_offset(s, rm),
168 vec_full_reg_offset(s, ra),
169 vsz, vsz, data, fn);
170}
171
96a461f7
RH
172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
174 int rd, int rn, int pg, int data)
175{
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
178 vec_full_reg_offset(s, rn),
179 pred_full_reg_offset(s, pg),
180 vsz, vsz, data, fn);
181}
182
36cbb7a8
RH
183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
185 int rd, int rn, int rm, int pg, int data)
186{
187 unsigned vsz = vec_full_reg_size(s);
188 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
189 vec_full_reg_offset(s, rn),
190 vec_full_reg_offset(s, rm),
191 pred_full_reg_offset(s, pg),
192 vsz, vsz, data, fn);
193}
f7d79c41 194
36cbb7a8 195/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
197 int esz, int rd, int rn)
38388f7e 198{
f7d79c41
RH
199 unsigned vsz = vec_full_reg_size(s);
200 gvec_fn(esz, vec_full_reg_offset(s, rd),
201 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
202}
203
39eea561 204/* Invoke a vector expander on three Zregs. */
28c4da31
RH
205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
206 int esz, int rd, int rn, int rm)
38388f7e 207{
28c4da31
RH
208 unsigned vsz = vec_full_reg_size(s);
209 gvec_fn(esz, vec_full_reg_offset(s, rd),
210 vec_full_reg_offset(s, rn),
211 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
212}
213
911cdc6d
RH
214/* Invoke a vector expander on four Zregs. */
215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
216 int esz, int rd, int rn, int rm, int ra)
217{
218 unsigned vsz = vec_full_reg_size(s);
219 gvec_fn(esz, vec_full_reg_offset(s, rd),
220 vec_full_reg_offset(s, rn),
221 vec_full_reg_offset(s, rm),
222 vec_full_reg_offset(s, ra), vsz, vsz);
223}
224
39eea561
RH
225/* Invoke a vector move on two Zregs. */
226static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 227{
f7d79c41
RH
228 if (sve_access_check(s)) {
229 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
230 }
231 return true;
38388f7e
RH
232}
233
d9d78dcc
RH
234/* Initialize a Zreg with replications of a 64-bit immediate. */
235static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
236{
237 unsigned vsz = vec_full_reg_size(s);
8711e71f 238 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
239}
240
516e246a 241/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
243 int rd, int rn, int rm)
516e246a 244{
dd81a8d7
RH
245 unsigned psz = pred_gvec_reg_size(s);
246 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
247 pred_full_reg_offset(s, rn),
248 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
249}
250
251/* Invoke a vector move on two Pregs. */
252static bool do_mov_p(DisasContext *s, int rd, int rn)
253{
d0b2df5a
RH
254 if (sve_access_check(s)) {
255 unsigned psz = pred_gvec_reg_size(s);
256 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
257 pred_full_reg_offset(s, rn), psz, psz);
258 }
259 return true;
516e246a
RH
260}
261
9e18d7a6
RH
262/* Set the cpu flags as per a return from an SVE helper. */
263static void do_pred_flags(TCGv_i32 t)
264{
265 tcg_gen_mov_i32(cpu_NF, t);
266 tcg_gen_andi_i32(cpu_ZF, t, 2);
267 tcg_gen_andi_i32(cpu_CF, t, 1);
268 tcg_gen_movi_i32(cpu_VF, 0);
269}
270
271/* Subroutines computing the ARM PredTest psuedofunction. */
272static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
273{
274 TCGv_i32 t = tcg_temp_new_i32();
275
276 gen_helper_sve_predtest1(t, d, g);
277 do_pred_flags(t);
278 tcg_temp_free_i32(t);
279}
280
281static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
282{
283 TCGv_ptr dptr = tcg_temp_new_ptr();
284 TCGv_ptr gptr = tcg_temp_new_ptr();
285 TCGv_i32 t;
286
287 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
288 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
289 t = tcg_const_i32(words);
290
291 gen_helper_sve_predtest(t, dptr, gptr, t);
292 tcg_temp_free_ptr(dptr);
293 tcg_temp_free_ptr(gptr);
294
295 do_pred_flags(t);
296 tcg_temp_free_i32(t);
297}
298
028e2a7b
RH
299/* For each element size, the bits within a predicate word that are active. */
300const uint64_t pred_esz_masks[4] = {
301 0xffffffffffffffffull, 0x5555555555555555ull,
302 0x1111111111111111ull, 0x0101010101010101ull
303};
304
39eea561
RH
305/*
306 *** SVE Logical - Unpredicated Group
307 */
308
28c4da31
RH
309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
310{
311 if (sve_access_check(s)) {
312 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
313 }
314 return true;
315}
316
3a7be554 317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 318{
28c4da31 319 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
320}
321
3a7be554 322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 323{
28c4da31 324 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
325}
326
3a7be554 327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 328{
28c4da31 329 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
330}
331
3a7be554 332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 333{
28c4da31 334 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 335}
d1822297 336
e6eba6e5
RH
337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
338{
339 TCGv_i64 t = tcg_temp_new_i64();
340 uint64_t mask = dup_const(MO_8, 0xff >> sh);
341
342 tcg_gen_xor_i64(t, n, m);
343 tcg_gen_shri_i64(d, t, sh);
344 tcg_gen_shli_i64(t, t, 8 - sh);
345 tcg_gen_andi_i64(d, d, mask);
346 tcg_gen_andi_i64(t, t, ~mask);
347 tcg_gen_or_i64(d, d, t);
348 tcg_temp_free_i64(t);
349}
350
351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
352{
353 TCGv_i64 t = tcg_temp_new_i64();
354 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
355
356 tcg_gen_xor_i64(t, n, m);
357 tcg_gen_shri_i64(d, t, sh);
358 tcg_gen_shli_i64(t, t, 16 - sh);
359 tcg_gen_andi_i64(d, d, mask);
360 tcg_gen_andi_i64(t, t, ~mask);
361 tcg_gen_or_i64(d, d, t);
362 tcg_temp_free_i64(t);
363}
364
365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
366{
367 tcg_gen_xor_i32(d, n, m);
368 tcg_gen_rotri_i32(d, d, sh);
369}
370
371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
372{
373 tcg_gen_xor_i64(d, n, m);
374 tcg_gen_rotri_i64(d, d, sh);
375}
376
377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
378 TCGv_vec m, int64_t sh)
379{
380 tcg_gen_xor_vec(vece, d, n, m);
381 tcg_gen_rotri_vec(vece, d, d, sh);
382}
383
384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
385 uint32_t rm_ofs, int64_t shift,
386 uint32_t opr_sz, uint32_t max_sz)
387{
388 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
389 static const GVecGen3i ops[4] = {
390 { .fni8 = gen_xar8_i64,
391 .fniv = gen_xar_vec,
392 .fno = gen_helper_sve2_xar_b,
393 .opt_opc = vecop,
394 .vece = MO_8 },
395 { .fni8 = gen_xar16_i64,
396 .fniv = gen_xar_vec,
397 .fno = gen_helper_sve2_xar_h,
398 .opt_opc = vecop,
399 .vece = MO_16 },
400 { .fni4 = gen_xar_i32,
401 .fniv = gen_xar_vec,
402 .fno = gen_helper_sve2_xar_s,
403 .opt_opc = vecop,
404 .vece = MO_32 },
405 { .fni8 = gen_xar_i64,
406 .fniv = gen_xar_vec,
407 .fno = gen_helper_gvec_xar_d,
408 .opt_opc = vecop,
409 .vece = MO_64 }
410 };
411 int esize = 8 << vece;
412
413 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
414 tcg_debug_assert(shift >= 0);
415 tcg_debug_assert(shift <= esize);
416 shift &= esize - 1;
417
418 if (shift == 0) {
419 /* xar with no rotate devolves to xor. */
420 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
421 } else {
422 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
423 shift, &ops[vece]);
424 }
425}
426
427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
428{
429 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
430 return false;
431 }
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
435 vec_full_reg_offset(s, a->rn),
436 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
437 }
438 return true;
439}
440
911cdc6d
RH
441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
442{
443 if (!dc_isar_feature(aa64_sve2, s)) {
444 return false;
445 }
446 if (sve_access_check(s)) {
447 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
448 }
449 return true;
450}
451
452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
453{
454 tcg_gen_xor_i64(d, n, m);
455 tcg_gen_xor_i64(d, d, k);
456}
457
458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
459 TCGv_vec m, TCGv_vec k)
460{
461 tcg_gen_xor_vec(vece, d, n, m);
462 tcg_gen_xor_vec(vece, d, d, k);
463}
464
465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
466 uint32_t a, uint32_t oprsz, uint32_t maxsz)
467{
468 static const GVecGen4 op = {
469 .fni8 = gen_eor3_i64,
470 .fniv = gen_eor3_vec,
471 .fno = gen_helper_sve2_eor3,
472 .vece = MO_64,
473 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
474 };
475 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
476}
477
478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
479{
480 return do_sve2_zzzz_fn(s, a, gen_eor3);
481}
482
483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
484{
485 tcg_gen_andc_i64(d, m, k);
486 tcg_gen_xor_i64(d, d, n);
487}
488
489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
490 TCGv_vec m, TCGv_vec k)
491{
492 tcg_gen_andc_vec(vece, d, m, k);
493 tcg_gen_xor_vec(vece, d, d, n);
494}
495
496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
497 uint32_t a, uint32_t oprsz, uint32_t maxsz)
498{
499 static const GVecGen4 op = {
500 .fni8 = gen_bcax_i64,
501 .fniv = gen_bcax_vec,
502 .fno = gen_helper_sve2_bcax,
503 .vece = MO_64,
504 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
505 };
506 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
507}
508
509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
510{
511 return do_sve2_zzzz_fn(s, a, gen_bcax);
512}
513
514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
515 uint32_t a, uint32_t oprsz, uint32_t maxsz)
516{
517 /* BSL differs from the generic bitsel in argument ordering. */
518 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
519}
520
521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
522{
523 return do_sve2_zzzz_fn(s, a, gen_bsl);
524}
525
526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
527{
528 tcg_gen_andc_i64(n, k, n);
529 tcg_gen_andc_i64(m, m, k);
530 tcg_gen_or_i64(d, n, m);
531}
532
533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
534 TCGv_vec m, TCGv_vec k)
535{
536 if (TCG_TARGET_HAS_bitsel_vec) {
537 tcg_gen_not_vec(vece, n, n);
538 tcg_gen_bitsel_vec(vece, d, k, n, m);
539 } else {
540 tcg_gen_andc_vec(vece, n, k, n);
541 tcg_gen_andc_vec(vece, m, m, k);
542 tcg_gen_or_vec(vece, d, n, m);
543 }
544}
545
546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
547 uint32_t a, uint32_t oprsz, uint32_t maxsz)
548{
549 static const GVecGen4 op = {
550 .fni8 = gen_bsl1n_i64,
551 .fniv = gen_bsl1n_vec,
552 .fno = gen_helper_sve2_bsl1n,
553 .vece = MO_64,
554 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
555 };
556 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
557}
558
559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
560{
561 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
562}
563
564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
565{
566 /*
567 * Z[dn] = (n & k) | (~m & ~k)
568 * = | ~(m | k)
569 */
570 tcg_gen_and_i64(n, n, k);
571 if (TCG_TARGET_HAS_orc_i64) {
572 tcg_gen_or_i64(m, m, k);
573 tcg_gen_orc_i64(d, n, m);
574 } else {
575 tcg_gen_nor_i64(m, m, k);
576 tcg_gen_or_i64(d, n, m);
577 }
578}
579
580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
581 TCGv_vec m, TCGv_vec k)
582{
583 if (TCG_TARGET_HAS_bitsel_vec) {
584 tcg_gen_not_vec(vece, m, m);
585 tcg_gen_bitsel_vec(vece, d, k, n, m);
586 } else {
587 tcg_gen_and_vec(vece, n, n, k);
588 tcg_gen_or_vec(vece, m, m, k);
589 tcg_gen_orc_vec(vece, d, n, m);
590 }
591}
592
593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
594 uint32_t a, uint32_t oprsz, uint32_t maxsz)
595{
596 static const GVecGen4 op = {
597 .fni8 = gen_bsl2n_i64,
598 .fniv = gen_bsl2n_vec,
599 .fno = gen_helper_sve2_bsl2n,
600 .vece = MO_64,
601 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
602 };
603 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
604}
605
606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
607{
608 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
609}
610
611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
612{
613 tcg_gen_and_i64(n, n, k);
614 tcg_gen_andc_i64(m, m, k);
615 tcg_gen_nor_i64(d, n, m);
616}
617
618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
619 TCGv_vec m, TCGv_vec k)
620{
621 tcg_gen_bitsel_vec(vece, d, k, n, m);
622 tcg_gen_not_vec(vece, d, d);
623}
624
625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
626 uint32_t a, uint32_t oprsz, uint32_t maxsz)
627{
628 static const GVecGen4 op = {
629 .fni8 = gen_nbsl_i64,
630 .fniv = gen_nbsl_vec,
631 .fno = gen_helper_sve2_nbsl,
632 .vece = MO_64,
633 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
634 };
635 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
636}
637
638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
639{
640 return do_sve2_zzzz_fn(s, a, gen_nbsl);
641}
642
fea98f9c
RH
643/*
644 *** SVE Integer Arithmetic - Unpredicated Group
645 */
646
3a7be554 647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 648{
28c4da31 649 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
650}
651
3a7be554 652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 653{
28c4da31 654 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
655}
656
3a7be554 657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 658{
28c4da31 659 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
660}
661
3a7be554 662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 663{
28c4da31 664 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
665}
666
3a7be554 667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 668{
28c4da31 669 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
670}
671
3a7be554 672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 673{
28c4da31 674 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
675}
676
f97cfd59
RH
677/*
678 *** SVE Integer Arithmetic - Binary Predicated Group
679 */
680
681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
682{
f97cfd59
RH
683 if (fn == NULL) {
684 return false;
685 }
686 if (sve_access_check(s)) {
36cbb7a8 687 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
688 }
689 return true;
690}
691
a2103582
RH
692/* Select active elememnts from Zn and inactive elements from Zm,
693 * storing the result in Zd.
694 */
695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
696{
697 static gen_helper_gvec_4 * const fns[4] = {
698 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
699 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
700 };
36cbb7a8 701 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
702}
703
f97cfd59 704#define DO_ZPZZ(NAME, name) \
3a7be554 705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
706{ \
707 static gen_helper_gvec_4 * const fns[4] = { \
708 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
709 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
710 }; \
711 return do_zpzz_ool(s, a, fns[a->esz]); \
712}
713
714DO_ZPZZ(AND, and)
715DO_ZPZZ(EOR, eor)
716DO_ZPZZ(ORR, orr)
717DO_ZPZZ(BIC, bic)
718
719DO_ZPZZ(ADD, add)
720DO_ZPZZ(SUB, sub)
721
722DO_ZPZZ(SMAX, smax)
723DO_ZPZZ(UMAX, umax)
724DO_ZPZZ(SMIN, smin)
725DO_ZPZZ(UMIN, umin)
726DO_ZPZZ(SABD, sabd)
727DO_ZPZZ(UABD, uabd)
728
729DO_ZPZZ(MUL, mul)
730DO_ZPZZ(SMULH, smulh)
731DO_ZPZZ(UMULH, umulh)
732
27721dbb
RH
733DO_ZPZZ(ASR, asr)
734DO_ZPZZ(LSR, lsr)
735DO_ZPZZ(LSL, lsl)
736
3a7be554 737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
738{
739 static gen_helper_gvec_4 * const fns[4] = {
740 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
741 };
742 return do_zpzz_ool(s, a, fns[a->esz]);
743}
744
3a7be554 745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
746{
747 static gen_helper_gvec_4 * const fns[4] = {
748 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
749 };
750 return do_zpzz_ool(s, a, fns[a->esz]);
751}
752
3a7be554 753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
754{
755 if (sve_access_check(s)) {
756 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
757 }
758 return true;
759}
d3fe4a29 760
f97cfd59
RH
761#undef DO_ZPZZ
762
afac6d04
RH
763/*
764 *** SVE Integer Arithmetic - Unary Predicated Group
765 */
766
767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
768{
769 if (fn == NULL) {
770 return false;
771 }
772 if (sve_access_check(s)) {
96a461f7 773 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
774 }
775 return true;
776}
777
778#define DO_ZPZ(NAME, name) \
3a7be554 779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
780{ \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
783 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
784 }; \
785 return do_zpz_ool(s, a, fns[a->esz]); \
786}
787
788DO_ZPZ(CLS, cls)
789DO_ZPZ(CLZ, clz)
790DO_ZPZ(CNT_zpz, cnt_zpz)
791DO_ZPZ(CNOT, cnot)
792DO_ZPZ(NOT_zpz, not_zpz)
793DO_ZPZ(ABS, abs)
794DO_ZPZ(NEG, neg)
795
3a7be554 796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
797{
798 static gen_helper_gvec_3 * const fns[4] = {
799 NULL,
800 gen_helper_sve_fabs_h,
801 gen_helper_sve_fabs_s,
802 gen_helper_sve_fabs_d
803 };
804 return do_zpz_ool(s, a, fns[a->esz]);
805}
806
3a7be554 807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
808{
809 static gen_helper_gvec_3 * const fns[4] = {
810 NULL,
811 gen_helper_sve_fneg_h,
812 gen_helper_sve_fneg_s,
813 gen_helper_sve_fneg_d
814 };
815 return do_zpz_ool(s, a, fns[a->esz]);
816}
817
3a7be554 818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
819{
820 static gen_helper_gvec_3 * const fns[4] = {
821 NULL,
822 gen_helper_sve_sxtb_h,
823 gen_helper_sve_sxtb_s,
824 gen_helper_sve_sxtb_d
825 };
826 return do_zpz_ool(s, a, fns[a->esz]);
827}
828
3a7be554 829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
830{
831 static gen_helper_gvec_3 * const fns[4] = {
832 NULL,
833 gen_helper_sve_uxtb_h,
834 gen_helper_sve_uxtb_s,
835 gen_helper_sve_uxtb_d
836 };
837 return do_zpz_ool(s, a, fns[a->esz]);
838}
839
3a7be554 840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
841{
842 static gen_helper_gvec_3 * const fns[4] = {
843 NULL, NULL,
844 gen_helper_sve_sxth_s,
845 gen_helper_sve_sxth_d
846 };
847 return do_zpz_ool(s, a, fns[a->esz]);
848}
849
3a7be554 850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
851{
852 static gen_helper_gvec_3 * const fns[4] = {
853 NULL, NULL,
854 gen_helper_sve_uxth_s,
855 gen_helper_sve_uxth_d
856 };
857 return do_zpz_ool(s, a, fns[a->esz]);
858}
859
3a7be554 860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
861{
862 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
863}
864
3a7be554 865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
866{
867 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
868}
869
870#undef DO_ZPZ
871
047cec97
RH
872/*
873 *** SVE Integer Reduction Group
874 */
875
876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
878 gen_helper_gvec_reduc *fn)
879{
880 unsigned vsz = vec_full_reg_size(s);
881 TCGv_ptr t_zn, t_pg;
882 TCGv_i32 desc;
883 TCGv_i64 temp;
884
885 if (fn == NULL) {
886 return false;
887 }
888 if (!sve_access_check(s)) {
889 return true;
890 }
891
892 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
893 temp = tcg_temp_new_i64();
894 t_zn = tcg_temp_new_ptr();
895 t_pg = tcg_temp_new_ptr();
896
897 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
898 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
899 fn(temp, t_zn, t_pg, desc);
900 tcg_temp_free_ptr(t_zn);
901 tcg_temp_free_ptr(t_pg);
902 tcg_temp_free_i32(desc);
903
904 write_fp_dreg(s, a->rd, temp);
905 tcg_temp_free_i64(temp);
906 return true;
907}
908
909#define DO_VPZ(NAME, name) \
3a7be554 910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
911{ \
912 static gen_helper_gvec_reduc * const fns[4] = { \
913 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
915 }; \
916 return do_vpz_ool(s, a, fns[a->esz]); \
917}
918
919DO_VPZ(ORV, orv)
920DO_VPZ(ANDV, andv)
921DO_VPZ(EORV, eorv)
922
923DO_VPZ(UADDV, uaddv)
924DO_VPZ(SMAXV, smaxv)
925DO_VPZ(UMAXV, umaxv)
926DO_VPZ(SMINV, sminv)
927DO_VPZ(UMINV, uminv)
928
3a7be554 929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
930{
931 static gen_helper_gvec_reduc * const fns[4] = {
932 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
933 gen_helper_sve_saddv_s, NULL
934 };
935 return do_vpz_ool(s, a, fns[a->esz]);
936}
937
938#undef DO_VPZ
939
ccd841c3
RH
940/*
941 *** SVE Shift by Immediate - Predicated Group
942 */
943
60245996
RH
944/*
945 * Copy Zn into Zd, storing zeros into inactive elements.
946 * If invert, store zeros into the active elements.
ccd841c3 947 */
60245996
RH
948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
949 int esz, bool invert)
ccd841c3 950{
60245996
RH
951 static gen_helper_gvec_3 * const fns[4] = {
952 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
953 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 954 };
60245996 955
ccd841c3 956 if (sve_access_check(s)) {
96a461f7 957 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
958 }
959 return true;
960}
961
962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
963 gen_helper_gvec_3 *fn)
964{
965 if (sve_access_check(s)) {
96a461f7 966 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
967 }
968 return true;
969}
970
3a7be554 971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
972{
973 static gen_helper_gvec_3 * const fns[4] = {
974 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
975 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
976 };
977 if (a->esz < 0) {
978 /* Invalid tsz encoding -- see tszimm_esz. */
979 return false;
980 }
981 /* Shift by element size is architecturally valid. For
982 arithmetic right-shift, it's the same as by one less. */
983 a->imm = MIN(a->imm, (8 << a->esz) - 1);
984 return do_zpzi_ool(s, a, fns[a->esz]);
985}
986
3a7be554 987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
988{
989 static gen_helper_gvec_3 * const fns[4] = {
990 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
991 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
992 };
993 if (a->esz < 0) {
994 return false;
995 }
996 /* Shift by element size is architecturally valid.
997 For logical shifts, it is a zeroing operation. */
998 if (a->imm >= (8 << a->esz)) {
60245996 999 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1000 } else {
1001 return do_zpzi_ool(s, a, fns[a->esz]);
1002 }
1003}
1004
3a7be554 1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1006{
1007 static gen_helper_gvec_3 * const fns[4] = {
1008 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1009 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1010 };
1011 if (a->esz < 0) {
1012 return false;
1013 }
1014 /* Shift by element size is architecturally valid.
1015 For logical shifts, it is a zeroing operation. */
1016 if (a->imm >= (8 << a->esz)) {
60245996 1017 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1018 } else {
1019 return do_zpzi_ool(s, a, fns[a->esz]);
1020 }
1021}
1022
3a7be554 1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1024{
1025 static gen_helper_gvec_3 * const fns[4] = {
1026 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1027 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1028 };
1029 if (a->esz < 0) {
1030 return false;
1031 }
1032 /* Shift by element size is architecturally valid. For arithmetic
1033 right shift for division, it is a zeroing operation. */
1034 if (a->imm >= (8 << a->esz)) {
60245996 1035 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1036 } else {
1037 return do_zpzi_ool(s, a, fns[a->esz]);
1038 }
1039}
1040
a5421b54
SL
1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1042{
1043 static gen_helper_gvec_3 * const fns[4] = {
1044 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1045 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1046 };
1047 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1048 return false;
1049 }
1050 return do_zpzi_ool(s, a, fns[a->esz]);
1051}
1052
1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1054{
1055 static gen_helper_gvec_3 * const fns[4] = {
1056 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1057 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1058 };
1059 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1060 return false;
1061 }
1062 return do_zpzi_ool(s, a, fns[a->esz]);
1063}
1064
1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1066{
1067 static gen_helper_gvec_3 * const fns[4] = {
1068 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1069 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1070 };
1071 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1072 return false;
1073 }
1074 return do_zpzi_ool(s, a, fns[a->esz]);
1075}
1076
1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1078{
1079 static gen_helper_gvec_3 * const fns[4] = {
1080 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1081 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1082 };
1083 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1084 return false;
1085 }
1086 return do_zpzi_ool(s, a, fns[a->esz]);
1087}
1088
1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1090{
1091 static gen_helper_gvec_3 * const fns[4] = {
1092 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1093 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1094 };
1095 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1096 return false;
1097 }
1098 return do_zpzi_ool(s, a, fns[a->esz]);
1099}
1100
fe7f8dfb
RH
1101/*
1102 *** SVE Bitwise Shift - Predicated Group
1103 */
1104
1105#define DO_ZPZW(NAME, name) \
3a7be554 1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1107{ \
1108 static gen_helper_gvec_4 * const fns[3] = { \
1109 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1110 gen_helper_sve_##name##_zpzw_s, \
1111 }; \
1112 if (a->esz < 0 || a->esz >= 3) { \
1113 return false; \
1114 } \
1115 return do_zpzz_ool(s, a, fns[a->esz]); \
1116}
1117
1118DO_ZPZW(ASR, asr)
1119DO_ZPZW(LSR, lsr)
1120DO_ZPZW(LSL, lsl)
1121
1122#undef DO_ZPZW
1123
d9d78dcc
RH
1124/*
1125 *** SVE Bitwise Shift - Unpredicated Group
1126 */
1127
1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1129 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1130 int64_t, uint32_t, uint32_t))
1131{
1132 if (a->esz < 0) {
1133 /* Invalid tsz encoding -- see tszimm_esz. */
1134 return false;
1135 }
1136 if (sve_access_check(s)) {
1137 unsigned vsz = vec_full_reg_size(s);
1138 /* Shift by element size is architecturally valid. For
1139 arithmetic right-shift, it's the same as by one less.
1140 Otherwise it is a zeroing operation. */
1141 if (a->imm >= 8 << a->esz) {
1142 if (asr) {
1143 a->imm = (8 << a->esz) - 1;
1144 } else {
1145 do_dupi_z(s, a->rd, 0);
1146 return true;
1147 }
1148 }
1149 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1150 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1151 }
1152 return true;
1153}
1154
3a7be554 1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1156{
1157 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1158}
1159
3a7be554 1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1161{
1162 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1163}
1164
3a7be554 1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1166{
1167 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1168}
1169
1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1171{
1172 if (fn == NULL) {
1173 return false;
1174 }
1175 if (sve_access_check(s)) {
e645d1a1 1176 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
1177 }
1178 return true;
1179}
1180
1181#define DO_ZZW(NAME, name) \
3a7be554 1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
1183{ \
1184 static gen_helper_gvec_3 * const fns[4] = { \
1185 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1186 gen_helper_sve_##name##_zzw_s, NULL \
1187 }; \
1188 return do_zzw_ool(s, a, fns[a->esz]); \
1189}
1190
1191DO_ZZW(ASR, asr)
1192DO_ZZW(LSR, lsr)
1193DO_ZZW(LSL, lsl)
1194
1195#undef DO_ZZW
1196
96a36e4a
RH
1197/*
1198 *** SVE Integer Multiply-Add Group
1199 */
1200
1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1202 gen_helper_gvec_5 *fn)
1203{
1204 if (sve_access_check(s)) {
1205 unsigned vsz = vec_full_reg_size(s);
1206 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1207 vec_full_reg_offset(s, a->ra),
1208 vec_full_reg_offset(s, a->rn),
1209 vec_full_reg_offset(s, a->rm),
1210 pred_full_reg_offset(s, a->pg),
1211 vsz, vsz, 0, fn);
1212 }
1213 return true;
1214}
1215
1216#define DO_ZPZZZ(NAME, name) \
3a7be554 1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1218{ \
1219 static gen_helper_gvec_5 * const fns[4] = { \
1220 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1221 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1222 }; \
1223 return do_zpzzz_ool(s, a, fns[a->esz]); \
1224}
1225
1226DO_ZPZZZ(MLA, mla)
1227DO_ZPZZZ(MLS, mls)
1228
1229#undef DO_ZPZZZ
1230
9a56c9c3
RH
1231/*
1232 *** SVE Index Generation Group
1233 */
1234
1235static void do_index(DisasContext *s, int esz, int rd,
1236 TCGv_i64 start, TCGv_i64 incr)
1237{
1238 unsigned vsz = vec_full_reg_size(s);
1239 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1240 TCGv_ptr t_zd = tcg_temp_new_ptr();
1241
1242 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1243 if (esz == 3) {
1244 gen_helper_sve_index_d(t_zd, start, incr, desc);
1245 } else {
1246 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1247 static index_fn * const fns[3] = {
1248 gen_helper_sve_index_b,
1249 gen_helper_sve_index_h,
1250 gen_helper_sve_index_s,
1251 };
1252 TCGv_i32 s32 = tcg_temp_new_i32();
1253 TCGv_i32 i32 = tcg_temp_new_i32();
1254
1255 tcg_gen_extrl_i64_i32(s32, start);
1256 tcg_gen_extrl_i64_i32(i32, incr);
1257 fns[esz](t_zd, s32, i32, desc);
1258
1259 tcg_temp_free_i32(s32);
1260 tcg_temp_free_i32(i32);
1261 }
1262 tcg_temp_free_ptr(t_zd);
1263 tcg_temp_free_i32(desc);
1264}
1265
3a7be554 1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1267{
1268 if (sve_access_check(s)) {
1269 TCGv_i64 start = tcg_const_i64(a->imm1);
1270 TCGv_i64 incr = tcg_const_i64(a->imm2);
1271 do_index(s, a->esz, a->rd, start, incr);
1272 tcg_temp_free_i64(start);
1273 tcg_temp_free_i64(incr);
1274 }
1275 return true;
1276}
1277
3a7be554 1278static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1279{
1280 if (sve_access_check(s)) {
1281 TCGv_i64 start = tcg_const_i64(a->imm);
1282 TCGv_i64 incr = cpu_reg(s, a->rm);
1283 do_index(s, a->esz, a->rd, start, incr);
1284 tcg_temp_free_i64(start);
1285 }
1286 return true;
1287}
1288
3a7be554 1289static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1290{
1291 if (sve_access_check(s)) {
1292 TCGv_i64 start = cpu_reg(s, a->rn);
1293 TCGv_i64 incr = tcg_const_i64(a->imm);
1294 do_index(s, a->esz, a->rd, start, incr);
1295 tcg_temp_free_i64(incr);
1296 }
1297 return true;
1298}
1299
3a7be554 1300static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1301{
1302 if (sve_access_check(s)) {
1303 TCGv_i64 start = cpu_reg(s, a->rn);
1304 TCGv_i64 incr = cpu_reg(s, a->rm);
1305 do_index(s, a->esz, a->rd, start, incr);
1306 }
1307 return true;
1308}
1309
96f922cc
RH
1310/*
1311 *** SVE Stack Allocation Group
1312 */
1313
3a7be554 1314static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1315{
5de56742
AC
1316 if (sve_access_check(s)) {
1317 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1318 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1319 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1320 }
96f922cc
RH
1321 return true;
1322}
1323
3a7be554 1324static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1325{
5de56742
AC
1326 if (sve_access_check(s)) {
1327 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1328 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1329 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1330 }
96f922cc
RH
1331 return true;
1332}
1333
3a7be554 1334static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1335{
5de56742
AC
1336 if (sve_access_check(s)) {
1337 TCGv_i64 reg = cpu_reg(s, a->rd);
1338 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1339 }
96f922cc
RH
1340 return true;
1341}
1342
4b242d9c
RH
1343/*
1344 *** SVE Compute Vector Address Group
1345 */
1346
1347static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1348{
1349 if (sve_access_check(s)) {
e645d1a1 1350 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1351 }
1352 return true;
1353}
1354
3a7be554 1355static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1356{
1357 return do_adr(s, a, gen_helper_sve_adr_p32);
1358}
1359
3a7be554 1360static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1361{
1362 return do_adr(s, a, gen_helper_sve_adr_p64);
1363}
1364
3a7be554 1365static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1366{
1367 return do_adr(s, a, gen_helper_sve_adr_s32);
1368}
1369
3a7be554 1370static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1371{
1372 return do_adr(s, a, gen_helper_sve_adr_u32);
1373}
1374
0762cd42
RH
1375/*
1376 *** SVE Integer Misc - Unpredicated Group
1377 */
1378
3a7be554 1379static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
1380{
1381 static gen_helper_gvec_2 * const fns[4] = {
1382 NULL,
1383 gen_helper_sve_fexpa_h,
1384 gen_helper_sve_fexpa_s,
1385 gen_helper_sve_fexpa_d,
1386 };
1387 if (a->esz == 0) {
1388 return false;
1389 }
1390 if (sve_access_check(s)) {
40e32e5a 1391 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1392 }
1393 return true;
1394}
1395
3a7be554 1396static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1397{
1398 static gen_helper_gvec_3 * const fns[4] = {
1399 NULL,
1400 gen_helper_sve_ftssel_h,
1401 gen_helper_sve_ftssel_s,
1402 gen_helper_sve_ftssel_d,
1403 };
1404 if (a->esz == 0) {
1405 return false;
1406 }
1407 if (sve_access_check(s)) {
e645d1a1 1408 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1409 }
1410 return true;
1411}
1412
516e246a
RH
1413/*
1414 *** SVE Predicate Logical Operations Group
1415 */
1416
1417static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1418 const GVecGen4 *gvec_op)
1419{
1420 if (!sve_access_check(s)) {
1421 return true;
1422 }
1423
1424 unsigned psz = pred_gvec_reg_size(s);
1425 int dofs = pred_full_reg_offset(s, a->rd);
1426 int nofs = pred_full_reg_offset(s, a->rn);
1427 int mofs = pred_full_reg_offset(s, a->rm);
1428 int gofs = pred_full_reg_offset(s, a->pg);
1429
dd81a8d7
RH
1430 if (!a->s) {
1431 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1432 return true;
1433 }
1434
516e246a
RH
1435 if (psz == 8) {
1436 /* Do the operation and the flags generation in temps. */
1437 TCGv_i64 pd = tcg_temp_new_i64();
1438 TCGv_i64 pn = tcg_temp_new_i64();
1439 TCGv_i64 pm = tcg_temp_new_i64();
1440 TCGv_i64 pg = tcg_temp_new_i64();
1441
1442 tcg_gen_ld_i64(pn, cpu_env, nofs);
1443 tcg_gen_ld_i64(pm, cpu_env, mofs);
1444 tcg_gen_ld_i64(pg, cpu_env, gofs);
1445
1446 gvec_op->fni8(pd, pn, pm, pg);
1447 tcg_gen_st_i64(pd, cpu_env, dofs);
1448
1449 do_predtest1(pd, pg);
1450
1451 tcg_temp_free_i64(pd);
1452 tcg_temp_free_i64(pn);
1453 tcg_temp_free_i64(pm);
1454 tcg_temp_free_i64(pg);
1455 } else {
1456 /* The operation and flags generation is large. The computation
1457 * of the flags depends on the original contents of the guarding
1458 * predicate. If the destination overwrites the guarding predicate,
1459 * then the easiest way to get this right is to save a copy.
1460 */
1461 int tofs = gofs;
1462 if (a->rd == a->pg) {
1463 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1464 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1465 }
1466
1467 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1468 do_predtest(s, dofs, tofs, psz / 8);
1469 }
1470 return true;
1471}
1472
1473static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1474{
1475 tcg_gen_and_i64(pd, pn, pm);
1476 tcg_gen_and_i64(pd, pd, pg);
1477}
1478
1479static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1480 TCGv_vec pm, TCGv_vec pg)
1481{
1482 tcg_gen_and_vec(vece, pd, pn, pm);
1483 tcg_gen_and_vec(vece, pd, pd, pg);
1484}
1485
3a7be554 1486static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1487{
1488 static const GVecGen4 op = {
1489 .fni8 = gen_and_pg_i64,
1490 .fniv = gen_and_pg_vec,
1491 .fno = gen_helper_sve_and_pppp,
1492 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1493 };
dd81a8d7
RH
1494
1495 if (!a->s) {
1496 if (!sve_access_check(s)) {
1497 return true;
1498 }
1499 if (a->rn == a->rm) {
1500 if (a->pg == a->rn) {
1501 do_mov_p(s, a->rd, a->rn);
1502 } else {
1503 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1504 }
1505 return true;
1506 } else if (a->pg == a->rn || a->pg == a->rm) {
1507 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1508 return true;
516e246a 1509 }
516e246a 1510 }
dd81a8d7 1511 return do_pppp_flags(s, a, &op);
516e246a
RH
1512}
1513
1514static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1515{
1516 tcg_gen_andc_i64(pd, pn, pm);
1517 tcg_gen_and_i64(pd, pd, pg);
1518}
1519
1520static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1521 TCGv_vec pm, TCGv_vec pg)
1522{
1523 tcg_gen_andc_vec(vece, pd, pn, pm);
1524 tcg_gen_and_vec(vece, pd, pd, pg);
1525}
1526
3a7be554 1527static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1528{
1529 static const GVecGen4 op = {
1530 .fni8 = gen_bic_pg_i64,
1531 .fniv = gen_bic_pg_vec,
1532 .fno = gen_helper_sve_bic_pppp,
1533 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1534 };
dd81a8d7
RH
1535
1536 if (!a->s && a->pg == a->rn) {
1537 if (sve_access_check(s)) {
1538 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1539 }
1540 return true;
516e246a 1541 }
dd81a8d7 1542 return do_pppp_flags(s, a, &op);
516e246a
RH
1543}
1544
1545static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1546{
1547 tcg_gen_xor_i64(pd, pn, pm);
1548 tcg_gen_and_i64(pd, pd, pg);
1549}
1550
1551static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1552 TCGv_vec pm, TCGv_vec pg)
1553{
1554 tcg_gen_xor_vec(vece, pd, pn, pm);
1555 tcg_gen_and_vec(vece, pd, pd, pg);
1556}
1557
3a7be554 1558static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1559{
1560 static const GVecGen4 op = {
1561 .fni8 = gen_eor_pg_i64,
1562 .fniv = gen_eor_pg_vec,
1563 .fno = gen_helper_sve_eor_pppp,
1564 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1565 };
dd81a8d7 1566 return do_pppp_flags(s, a, &op);
516e246a
RH
1567}
1568
3a7be554 1569static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1570{
516e246a
RH
1571 if (a->s) {
1572 return false;
516e246a 1573 }
d4bc6232
RH
1574 if (sve_access_check(s)) {
1575 unsigned psz = pred_gvec_reg_size(s);
1576 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1577 pred_full_reg_offset(s, a->pg),
1578 pred_full_reg_offset(s, a->rn),
1579 pred_full_reg_offset(s, a->rm), psz, psz);
1580 }
1581 return true;
516e246a
RH
1582}
1583
1584static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1585{
1586 tcg_gen_or_i64(pd, pn, pm);
1587 tcg_gen_and_i64(pd, pd, pg);
1588}
1589
1590static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1591 TCGv_vec pm, TCGv_vec pg)
1592{
1593 tcg_gen_or_vec(vece, pd, pn, pm);
1594 tcg_gen_and_vec(vece, pd, pd, pg);
1595}
1596
3a7be554 1597static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1598{
1599 static const GVecGen4 op = {
1600 .fni8 = gen_orr_pg_i64,
1601 .fniv = gen_orr_pg_vec,
1602 .fno = gen_helper_sve_orr_pppp,
1603 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1604 };
dd81a8d7
RH
1605
1606 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1607 return do_mov_p(s, a->rd, a->rn);
516e246a 1608 }
dd81a8d7 1609 return do_pppp_flags(s, a, &op);
516e246a
RH
1610}
1611
1612static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1613{
1614 tcg_gen_orc_i64(pd, pn, pm);
1615 tcg_gen_and_i64(pd, pd, pg);
1616}
1617
1618static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1619 TCGv_vec pm, TCGv_vec pg)
1620{
1621 tcg_gen_orc_vec(vece, pd, pn, pm);
1622 tcg_gen_and_vec(vece, pd, pd, pg);
1623}
1624
3a7be554 1625static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1626{
1627 static const GVecGen4 op = {
1628 .fni8 = gen_orn_pg_i64,
1629 .fniv = gen_orn_pg_vec,
1630 .fno = gen_helper_sve_orn_pppp,
1631 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1632 };
dd81a8d7 1633 return do_pppp_flags(s, a, &op);
516e246a
RH
1634}
1635
1636static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1637{
1638 tcg_gen_or_i64(pd, pn, pm);
1639 tcg_gen_andc_i64(pd, pg, pd);
1640}
1641
1642static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1643 TCGv_vec pm, TCGv_vec pg)
1644{
1645 tcg_gen_or_vec(vece, pd, pn, pm);
1646 tcg_gen_andc_vec(vece, pd, pg, pd);
1647}
1648
3a7be554 1649static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1650{
1651 static const GVecGen4 op = {
1652 .fni8 = gen_nor_pg_i64,
1653 .fniv = gen_nor_pg_vec,
1654 .fno = gen_helper_sve_nor_pppp,
1655 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1656 };
dd81a8d7 1657 return do_pppp_flags(s, a, &op);
516e246a
RH
1658}
1659
1660static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1661{
1662 tcg_gen_and_i64(pd, pn, pm);
1663 tcg_gen_andc_i64(pd, pg, pd);
1664}
1665
1666static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1667 TCGv_vec pm, TCGv_vec pg)
1668{
1669 tcg_gen_and_vec(vece, pd, pn, pm);
1670 tcg_gen_andc_vec(vece, pd, pg, pd);
1671}
1672
3a7be554 1673static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1674{
1675 static const GVecGen4 op = {
1676 .fni8 = gen_nand_pg_i64,
1677 .fniv = gen_nand_pg_vec,
1678 .fno = gen_helper_sve_nand_pppp,
1679 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1680 };
dd81a8d7 1681 return do_pppp_flags(s, a, &op);
516e246a
RH
1682}
1683
9e18d7a6
RH
1684/*
1685 *** SVE Predicate Misc Group
1686 */
1687
3a7be554 1688static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1689{
1690 if (sve_access_check(s)) {
1691 int nofs = pred_full_reg_offset(s, a->rn);
1692 int gofs = pred_full_reg_offset(s, a->pg);
1693 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1694
1695 if (words == 1) {
1696 TCGv_i64 pn = tcg_temp_new_i64();
1697 TCGv_i64 pg = tcg_temp_new_i64();
1698
1699 tcg_gen_ld_i64(pn, cpu_env, nofs);
1700 tcg_gen_ld_i64(pg, cpu_env, gofs);
1701 do_predtest1(pn, pg);
1702
1703 tcg_temp_free_i64(pn);
1704 tcg_temp_free_i64(pg);
1705 } else {
1706 do_predtest(s, nofs, gofs, words);
1707 }
1708 }
1709 return true;
1710}
1711
028e2a7b
RH
1712/* See the ARM pseudocode DecodePredCount. */
1713static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1714{
1715 unsigned elements = fullsz >> esz;
1716 unsigned bound;
1717
1718 switch (pattern) {
1719 case 0x0: /* POW2 */
1720 return pow2floor(elements);
1721 case 0x1: /* VL1 */
1722 case 0x2: /* VL2 */
1723 case 0x3: /* VL3 */
1724 case 0x4: /* VL4 */
1725 case 0x5: /* VL5 */
1726 case 0x6: /* VL6 */
1727 case 0x7: /* VL7 */
1728 case 0x8: /* VL8 */
1729 bound = pattern;
1730 break;
1731 case 0x9: /* VL16 */
1732 case 0xa: /* VL32 */
1733 case 0xb: /* VL64 */
1734 case 0xc: /* VL128 */
1735 case 0xd: /* VL256 */
1736 bound = 16 << (pattern - 9);
1737 break;
1738 case 0x1d: /* MUL4 */
1739 return elements - elements % 4;
1740 case 0x1e: /* MUL3 */
1741 return elements - elements % 3;
1742 case 0x1f: /* ALL */
1743 return elements;
1744 default: /* #uimm5 */
1745 return 0;
1746 }
1747 return elements >= bound ? bound : 0;
1748}
1749
1750/* This handles all of the predicate initialization instructions,
1751 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1752 * so that decode_pred_count returns 0. For SETFFR, we will have
1753 * set RD == 16 == FFR.
1754 */
1755static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1756{
1757 if (!sve_access_check(s)) {
1758 return true;
1759 }
1760
1761 unsigned fullsz = vec_full_reg_size(s);
1762 unsigned ofs = pred_full_reg_offset(s, rd);
1763 unsigned numelem, setsz, i;
1764 uint64_t word, lastword;
1765 TCGv_i64 t;
1766
1767 numelem = decode_pred_count(fullsz, pat, esz);
1768
1769 /* Determine what we must store into each bit, and how many. */
1770 if (numelem == 0) {
1771 lastword = word = 0;
1772 setsz = fullsz;
1773 } else {
1774 setsz = numelem << esz;
1775 lastword = word = pred_esz_masks[esz];
1776 if (setsz % 64) {
973558a3 1777 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1778 }
1779 }
1780
1781 t = tcg_temp_new_i64();
1782 if (fullsz <= 64) {
1783 tcg_gen_movi_i64(t, lastword);
1784 tcg_gen_st_i64(t, cpu_env, ofs);
1785 goto done;
1786 }
1787
1788 if (word == lastword) {
1789 unsigned maxsz = size_for_gvec(fullsz / 8);
1790 unsigned oprsz = size_for_gvec(setsz / 8);
1791
1792 if (oprsz * 8 == setsz) {
8711e71f 1793 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1794 goto done;
1795 }
028e2a7b
RH
1796 }
1797
1798 setsz /= 8;
1799 fullsz /= 8;
1800
1801 tcg_gen_movi_i64(t, word);
973558a3 1802 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1803 tcg_gen_st_i64(t, cpu_env, ofs + i);
1804 }
1805 if (lastword != word) {
1806 tcg_gen_movi_i64(t, lastword);
1807 tcg_gen_st_i64(t, cpu_env, ofs + i);
1808 i += 8;
1809 }
1810 if (i < fullsz) {
1811 tcg_gen_movi_i64(t, 0);
1812 for (; i < fullsz; i += 8) {
1813 tcg_gen_st_i64(t, cpu_env, ofs + i);
1814 }
1815 }
1816
1817 done:
1818 tcg_temp_free_i64(t);
1819
1820 /* PTRUES */
1821 if (setflag) {
1822 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1823 tcg_gen_movi_i32(cpu_CF, word == 0);
1824 tcg_gen_movi_i32(cpu_VF, 0);
1825 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1826 }
1827 return true;
1828}
1829
3a7be554 1830static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1831{
1832 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1833}
1834
3a7be554 1835static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1836{
1837 /* Note pat == 31 is #all, to set all elements. */
1838 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1839}
1840
3a7be554 1841static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1842{
1843 /* Note pat == 32 is #unimp, to set no elements. */
1844 return do_predset(s, 0, a->rd, 32, false);
1845}
1846
3a7be554 1847static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1848{
1849 /* The path through do_pppp_flags is complicated enough to want to avoid
1850 * duplication. Frob the arguments into the form of a predicated AND.
1851 */
1852 arg_rprr_s alt_a = {
1853 .rd = a->rd, .pg = a->pg, .s = a->s,
1854 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1855 };
3a7be554 1856 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1857}
1858
3a7be554 1859static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1860{
1861 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1862}
1863
3a7be554 1864static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1865{
1866 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1867}
1868
1869static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1870 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1871 TCGv_ptr, TCGv_i32))
1872{
1873 if (!sve_access_check(s)) {
1874 return true;
1875 }
1876
1877 TCGv_ptr t_pd = tcg_temp_new_ptr();
1878 TCGv_ptr t_pg = tcg_temp_new_ptr();
1879 TCGv_i32 t;
86300b5d 1880 unsigned desc = 0;
028e2a7b 1881
86300b5d
RH
1882 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1883 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1884
1885 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1886 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1887 t = tcg_const_i32(desc);
1888
1889 gen_fn(t, t_pd, t_pg, t);
1890 tcg_temp_free_ptr(t_pd);
1891 tcg_temp_free_ptr(t_pg);
1892
1893 do_pred_flags(t);
1894 tcg_temp_free_i32(t);
1895 return true;
1896}
1897
3a7be554 1898static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1899{
1900 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1901}
1902
3a7be554 1903static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1904{
1905 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1906}
1907
24e82e68
RH
1908/*
1909 *** SVE Element Count Group
1910 */
1911
1912/* Perform an inline saturating addition of a 32-bit value within
1913 * a 64-bit register. The second operand is known to be positive,
1914 * which halves the comparisions we must perform to bound the result.
1915 */
1916static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1917{
1918 int64_t ibound;
1919 TCGv_i64 bound;
1920 TCGCond cond;
1921
1922 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1923 if (u) {
1924 tcg_gen_ext32u_i64(reg, reg);
1925 } else {
1926 tcg_gen_ext32s_i64(reg, reg);
1927 }
1928 if (d) {
1929 tcg_gen_sub_i64(reg, reg, val);
1930 ibound = (u ? 0 : INT32_MIN);
1931 cond = TCG_COND_LT;
1932 } else {
1933 tcg_gen_add_i64(reg, reg, val);
1934 ibound = (u ? UINT32_MAX : INT32_MAX);
1935 cond = TCG_COND_GT;
1936 }
1937 bound = tcg_const_i64(ibound);
1938 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1939 tcg_temp_free_i64(bound);
1940}
1941
1942/* Similarly with 64-bit values. */
1943static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1944{
1945 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1946 TCGv_i64 t2;
1947
1948 if (u) {
1949 if (d) {
1950 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1951 t2 = tcg_constant_i64(0);
1952 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1953 } else {
1954 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1955 t2 = tcg_constant_i64(-1);
1956 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1957 }
1958 } else {
35a1ec8e 1959 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1960 if (d) {
1961 /* Detect signed overflow for subtraction. */
1962 tcg_gen_xor_i64(t0, reg, val);
1963 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1964 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1965 tcg_gen_and_i64(t0, t0, reg);
1966
1967 /* Bound the result. */
1968 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1969 t2 = tcg_constant_i64(0);
24e82e68
RH
1970 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1971 } else {
1972 /* Detect signed overflow for addition. */
1973 tcg_gen_xor_i64(t0, reg, val);
1974 tcg_gen_add_i64(reg, reg, val);
1975 tcg_gen_xor_i64(t1, reg, val);
1976 tcg_gen_andc_i64(t0, t1, t0);
1977
1978 /* Bound the result. */
1979 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1980 t2 = tcg_constant_i64(0);
24e82e68
RH
1981 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1982 }
35a1ec8e 1983 tcg_temp_free_i64(t1);
24e82e68
RH
1984 }
1985 tcg_temp_free_i64(t0);
24e82e68
RH
1986}
1987
1988/* Similarly with a vector and a scalar operand. */
1989static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1990 TCGv_i64 val, bool u, bool d)
1991{
1992 unsigned vsz = vec_full_reg_size(s);
1993 TCGv_ptr dptr, nptr;
1994 TCGv_i32 t32, desc;
1995 TCGv_i64 t64;
1996
1997 dptr = tcg_temp_new_ptr();
1998 nptr = tcg_temp_new_ptr();
1999 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
2000 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
2001 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2002
2003 switch (esz) {
2004 case MO_8:
2005 t32 = tcg_temp_new_i32();
2006 tcg_gen_extrl_i64_i32(t32, val);
2007 if (d) {
2008 tcg_gen_neg_i32(t32, t32);
2009 }
2010 if (u) {
2011 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
2012 } else {
2013 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2014 }
2015 tcg_temp_free_i32(t32);
2016 break;
2017
2018 case MO_16:
2019 t32 = tcg_temp_new_i32();
2020 tcg_gen_extrl_i64_i32(t32, val);
2021 if (d) {
2022 tcg_gen_neg_i32(t32, t32);
2023 }
2024 if (u) {
2025 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2026 } else {
2027 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2028 }
2029 tcg_temp_free_i32(t32);
2030 break;
2031
2032 case MO_32:
2033 t64 = tcg_temp_new_i64();
2034 if (d) {
2035 tcg_gen_neg_i64(t64, val);
2036 } else {
2037 tcg_gen_mov_i64(t64, val);
2038 }
2039 if (u) {
2040 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2041 } else {
2042 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2043 }
2044 tcg_temp_free_i64(t64);
2045 break;
2046
2047 case MO_64:
2048 if (u) {
2049 if (d) {
2050 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2051 } else {
2052 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2053 }
2054 } else if (d) {
2055 t64 = tcg_temp_new_i64();
2056 tcg_gen_neg_i64(t64, val);
2057 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2058 tcg_temp_free_i64(t64);
2059 } else {
2060 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2061 }
2062 break;
2063
2064 default:
2065 g_assert_not_reached();
2066 }
2067
2068 tcg_temp_free_ptr(dptr);
2069 tcg_temp_free_ptr(nptr);
2070 tcg_temp_free_i32(desc);
2071}
2072
3a7be554 2073static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2074{
2075 if (sve_access_check(s)) {
2076 unsigned fullsz = vec_full_reg_size(s);
2077 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2078 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2079 }
2080 return true;
2081}
2082
3a7be554 2083static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2084{
2085 if (sve_access_check(s)) {
2086 unsigned fullsz = vec_full_reg_size(s);
2087 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2088 int inc = numelem * a->imm * (a->d ? -1 : 1);
2089 TCGv_i64 reg = cpu_reg(s, a->rd);
2090
2091 tcg_gen_addi_i64(reg, reg, inc);
2092 }
2093 return true;
2094}
2095
3a7be554 2096static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2097{
2098 if (!sve_access_check(s)) {
2099 return true;
2100 }
2101
2102 unsigned fullsz = vec_full_reg_size(s);
2103 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2104 int inc = numelem * a->imm;
2105 TCGv_i64 reg = cpu_reg(s, a->rd);
2106
2107 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2108 if (inc == 0) {
2109 if (a->u) {
2110 tcg_gen_ext32u_i64(reg, reg);
2111 } else {
2112 tcg_gen_ext32s_i64(reg, reg);
2113 }
2114 } else {
2115 TCGv_i64 t = tcg_const_i64(inc);
2116 do_sat_addsub_32(reg, t, a->u, a->d);
2117 tcg_temp_free_i64(t);
2118 }
2119 return true;
2120}
2121
3a7be554 2122static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2123{
2124 if (!sve_access_check(s)) {
2125 return true;
2126 }
2127
2128 unsigned fullsz = vec_full_reg_size(s);
2129 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2130 int inc = numelem * a->imm;
2131 TCGv_i64 reg = cpu_reg(s, a->rd);
2132
2133 if (inc != 0) {
2134 TCGv_i64 t = tcg_const_i64(inc);
2135 do_sat_addsub_64(reg, t, a->u, a->d);
2136 tcg_temp_free_i64(t);
2137 }
2138 return true;
2139}
2140
3a7be554 2141static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2142{
2143 if (a->esz == 0) {
2144 return false;
2145 }
2146
2147 unsigned fullsz = vec_full_reg_size(s);
2148 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2149 int inc = numelem * a->imm;
2150
2151 if (inc != 0) {
2152 if (sve_access_check(s)) {
2153 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
2154 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2155 vec_full_reg_offset(s, a->rn),
2156 t, fullsz, fullsz);
2157 tcg_temp_free_i64(t);
2158 }
2159 } else {
2160 do_mov_z(s, a->rd, a->rn);
2161 }
2162 return true;
2163}
2164
3a7be554 2165static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2166{
2167 if (a->esz == 0) {
2168 return false;
2169 }
2170
2171 unsigned fullsz = vec_full_reg_size(s);
2172 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2173 int inc = numelem * a->imm;
2174
2175 if (inc != 0) {
2176 if (sve_access_check(s)) {
2177 TCGv_i64 t = tcg_const_i64(inc);
2178 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
2179 tcg_temp_free_i64(t);
2180 }
2181 } else {
2182 do_mov_z(s, a->rd, a->rn);
2183 }
2184 return true;
2185}
2186
e1fa1164
RH
2187/*
2188 *** SVE Bitwise Immediate Group
2189 */
2190
2191static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2192{
2193 uint64_t imm;
2194 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2195 extract32(a->dbm, 0, 6),
2196 extract32(a->dbm, 6, 6))) {
2197 return false;
2198 }
2199 if (sve_access_check(s)) {
2200 unsigned vsz = vec_full_reg_size(s);
2201 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2202 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2203 }
2204 return true;
2205}
2206
3a7be554 2207static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2208{
2209 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2210}
2211
3a7be554 2212static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2213{
2214 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2215}
2216
3a7be554 2217static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2218{
2219 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2220}
2221
3a7be554 2222static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2223{
2224 uint64_t imm;
2225 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2226 extract32(a->dbm, 0, 6),
2227 extract32(a->dbm, 6, 6))) {
2228 return false;
2229 }
2230 if (sve_access_check(s)) {
2231 do_dupi_z(s, a->rd, imm);
2232 }
2233 return true;
2234}
2235
f25a2361
RH
2236/*
2237 *** SVE Integer Wide Immediate - Predicated Group
2238 */
2239
2240/* Implement all merging copies. This is used for CPY (immediate),
2241 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2242 */
2243static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2244 TCGv_i64 val)
2245{
2246 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2247 static gen_cpy * const fns[4] = {
2248 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2249 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2250 };
2251 unsigned vsz = vec_full_reg_size(s);
2252 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2253 TCGv_ptr t_zd = tcg_temp_new_ptr();
2254 TCGv_ptr t_zn = tcg_temp_new_ptr();
2255 TCGv_ptr t_pg = tcg_temp_new_ptr();
2256
2257 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2258 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2259 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2260
2261 fns[esz](t_zd, t_zn, t_pg, val, desc);
2262
2263 tcg_temp_free_ptr(t_zd);
2264 tcg_temp_free_ptr(t_zn);
2265 tcg_temp_free_ptr(t_pg);
2266 tcg_temp_free_i32(desc);
2267}
2268
3a7be554 2269static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2270{
2271 if (a->esz == 0) {
2272 return false;
2273 }
2274 if (sve_access_check(s)) {
2275 /* Decode the VFP immediate. */
2276 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2277 TCGv_i64 t_imm = tcg_const_i64(imm);
2278 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2279 tcg_temp_free_i64(t_imm);
2280 }
2281 return true;
2282}
2283
3a7be554 2284static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2285{
3a7be554 2286 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2287 return false;
2288 }
2289 if (sve_access_check(s)) {
2290 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2291 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2292 tcg_temp_free_i64(t_imm);
2293 }
2294 return true;
2295}
2296
3a7be554 2297static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2298{
2299 static gen_helper_gvec_2i * const fns[4] = {
2300 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2301 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2302 };
2303
3a7be554 2304 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2305 return false;
2306 }
2307 if (sve_access_check(s)) {
2308 unsigned vsz = vec_full_reg_size(s);
2309 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2310 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2311 pred_full_reg_offset(s, a->pg),
2312 t_imm, vsz, vsz, 0, fns[a->esz]);
2313 tcg_temp_free_i64(t_imm);
2314 }
2315 return true;
2316}
2317
b94f8f60
RH
2318/*
2319 *** SVE Permute Extract Group
2320 */
2321
75114792 2322static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2323{
2324 if (!sve_access_check(s)) {
2325 return true;
2326 }
2327
2328 unsigned vsz = vec_full_reg_size(s);
75114792 2329 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2330 unsigned n_siz = vsz - n_ofs;
75114792
SL
2331 unsigned d = vec_full_reg_offset(s, rd);
2332 unsigned n = vec_full_reg_offset(s, rn);
2333 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2334
2335 /* Use host vector move insns if we have appropriate sizes
2336 * and no unfortunate overlap.
2337 */
2338 if (m != d
2339 && n_ofs == size_for_gvec(n_ofs)
2340 && n_siz == size_for_gvec(n_siz)
2341 && (d != n || n_siz <= n_ofs)) {
2342 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2343 if (n_ofs != 0) {
2344 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2345 }
2346 } else {
2347 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2348 }
2349 return true;
2350}
2351
75114792
SL
2352static bool trans_EXT(DisasContext *s, arg_EXT *a)
2353{
2354 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2355}
2356
2357static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2358{
2359 if (!dc_isar_feature(aa64_sve2, s)) {
2360 return false;
2361 }
2362 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2363}
2364
30562ab7
RH
2365/*
2366 *** SVE Permute - Unpredicated Group
2367 */
2368
3a7be554 2369static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2370{
2371 if (sve_access_check(s)) {
2372 unsigned vsz = vec_full_reg_size(s);
2373 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2374 vsz, vsz, cpu_reg_sp(s, a->rn));
2375 }
2376 return true;
2377}
2378
3a7be554 2379static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2380{
2381 if ((a->imm & 0x1f) == 0) {
2382 return false;
2383 }
2384 if (sve_access_check(s)) {
2385 unsigned vsz = vec_full_reg_size(s);
2386 unsigned dofs = vec_full_reg_offset(s, a->rd);
2387 unsigned esz, index;
2388
2389 esz = ctz32(a->imm);
2390 index = a->imm >> (esz + 1);
2391
2392 if ((index << esz) < vsz) {
2393 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2394 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2395 } else {
7e17d50e
RH
2396 /*
2397 * While dup_mem handles 128-bit elements, dup_imm does not.
2398 * Thankfully element size doesn't matter for splatting zero.
2399 */
2400 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2401 }
2402 }
2403 return true;
2404}
2405
2406static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2407{
2408 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2409 static gen_insr * const fns[4] = {
2410 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2411 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2412 };
2413 unsigned vsz = vec_full_reg_size(s);
2414 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2415 TCGv_ptr t_zd = tcg_temp_new_ptr();
2416 TCGv_ptr t_zn = tcg_temp_new_ptr();
2417
2418 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2419 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2420
2421 fns[a->esz](t_zd, t_zn, val, desc);
2422
2423 tcg_temp_free_ptr(t_zd);
2424 tcg_temp_free_ptr(t_zn);
2425 tcg_temp_free_i32(desc);
2426}
2427
3a7be554 2428static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2429{
2430 if (sve_access_check(s)) {
2431 TCGv_i64 t = tcg_temp_new_i64();
2432 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2433 do_insr_i64(s, a, t);
2434 tcg_temp_free_i64(t);
2435 }
2436 return true;
2437}
2438
3a7be554 2439static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2440{
2441 if (sve_access_check(s)) {
2442 do_insr_i64(s, a, cpu_reg(s, a->rm));
2443 }
2444 return true;
2445}
2446
3a7be554 2447static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2448{
2449 static gen_helper_gvec_2 * const fns[4] = {
2450 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2451 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2452 };
2453
2454 if (sve_access_check(s)) {
40e32e5a 2455 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2456 }
2457 return true;
2458}
2459
3a7be554 2460static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2461{
2462 static gen_helper_gvec_3 * const fns[4] = {
2463 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2464 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2465 };
2466
2467 if (sve_access_check(s)) {
e645d1a1 2468 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2469 }
2470 return true;
2471}
2472
80a712a2
SL
2473static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2474{
2475 static gen_helper_gvec_4 * const fns[4] = {
2476 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2477 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2478 };
2479
2480 if (!dc_isar_feature(aa64_sve2, s)) {
2481 return false;
2482 }
2483 if (sve_access_check(s)) {
2484 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2485 (a->rn + 1) % 32, a->rm, 0);
2486 }
2487 return true;
2488}
2489
2490static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2491{
2492 static gen_helper_gvec_3 * const fns[4] = {
2493 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2494 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2495 };
2496
2497 if (!dc_isar_feature(aa64_sve2, s)) {
2498 return false;
2499 }
2500 if (sve_access_check(s)) {
2501 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2502 }
2503 return true;
2504}
2505
3a7be554 2506static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2507{
2508 static gen_helper_gvec_2 * const fns[4][2] = {
2509 { NULL, NULL },
2510 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2511 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2512 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2513 };
2514
2515 if (a->esz == 0) {
2516 return false;
2517 }
2518 if (sve_access_check(s)) {
2519 unsigned vsz = vec_full_reg_size(s);
2520 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2521 vec_full_reg_offset(s, a->rn)
2522 + (a->h ? vsz / 2 : 0),
2523 vsz, vsz, 0, fns[a->esz][a->u]);
2524 }
2525 return true;
2526}
2527
d731d8cb
RH
2528/*
2529 *** SVE Permute - Predicates Group
2530 */
2531
2532static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2533 gen_helper_gvec_3 *fn)
2534{
2535 if (!sve_access_check(s)) {
2536 return true;
2537 }
2538
2539 unsigned vsz = pred_full_reg_size(s);
2540
d731d8cb
RH
2541 TCGv_ptr t_d = tcg_temp_new_ptr();
2542 TCGv_ptr t_n = tcg_temp_new_ptr();
2543 TCGv_ptr t_m = tcg_temp_new_ptr();
2544 TCGv_i32 t_desc;
f9b0fcce 2545 uint32_t desc = 0;
d731d8cb 2546
f9b0fcce
RH
2547 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2548 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2549 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2550
2551 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2552 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2553 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2554 t_desc = tcg_const_i32(desc);
2555
2556 fn(t_d, t_n, t_m, t_desc);
2557
2558 tcg_temp_free_ptr(t_d);
2559 tcg_temp_free_ptr(t_n);
2560 tcg_temp_free_ptr(t_m);
2561 tcg_temp_free_i32(t_desc);
2562 return true;
2563}
2564
2565static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2566 gen_helper_gvec_2 *fn)
2567{
2568 if (!sve_access_check(s)) {
2569 return true;
2570 }
2571
2572 unsigned vsz = pred_full_reg_size(s);
2573 TCGv_ptr t_d = tcg_temp_new_ptr();
2574 TCGv_ptr t_n = tcg_temp_new_ptr();
2575 TCGv_i32 t_desc;
70acaafe 2576 uint32_t desc = 0;
d731d8cb
RH
2577
2578 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2579 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2580
70acaafe
RH
2581 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2582 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2583 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2584 t_desc = tcg_const_i32(desc);
2585
2586 fn(t_d, t_n, t_desc);
2587
2588 tcg_temp_free_i32(t_desc);
2589 tcg_temp_free_ptr(t_d);
2590 tcg_temp_free_ptr(t_n);
2591 return true;
2592}
2593
3a7be554 2594static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2595{
2596 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2597}
2598
3a7be554 2599static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2600{
2601 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2602}
2603
3a7be554 2604static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2605{
2606 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2607}
2608
3a7be554 2609static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2610{
2611 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2612}
2613
3a7be554 2614static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2615{
2616 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2617}
2618
3a7be554 2619static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2620{
2621 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2622}
2623
3a7be554 2624static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2625{
2626 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2627}
2628
3a7be554 2629static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2630{
2631 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2632}
2633
3a7be554 2634static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2635{
2636 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2637}
2638
234b48e9
RH
2639/*
2640 *** SVE Permute - Interleaving Group
2641 */
2642
2643static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2644{
2645 static gen_helper_gvec_3 * const fns[4] = {
2646 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2647 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2648 };
2649
2650 if (sve_access_check(s)) {
2651 unsigned vsz = vec_full_reg_size(s);
2652 unsigned high_ofs = high ? vsz / 2 : 0;
2653 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2654 vec_full_reg_offset(s, a->rn) + high_ofs,
2655 vec_full_reg_offset(s, a->rm) + high_ofs,
2656 vsz, vsz, 0, fns[a->esz]);
2657 }
2658 return true;
2659}
2660
2661static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2662 gen_helper_gvec_3 *fn)
2663{
2664 if (sve_access_check(s)) {
e645d1a1 2665 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2666 }
2667 return true;
2668}
2669
3a7be554 2670static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2671{
2672 return do_zip(s, a, false);
2673}
2674
3a7be554 2675static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2676{
2677 return do_zip(s, a, true);
2678}
2679
74b64b25
RH
2680static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2681{
2682 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2683 return false;
2684 }
2685 if (sve_access_check(s)) {
2686 unsigned vsz = vec_full_reg_size(s);
2687 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2688 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2689 vec_full_reg_offset(s, a->rn) + high_ofs,
2690 vec_full_reg_offset(s, a->rm) + high_ofs,
2691 vsz, vsz, 0, gen_helper_sve2_zip_q);
2692 }
2693 return true;
2694}
2695
2696static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2697{
2698 return do_zip_q(s, a, false);
2699}
2700
2701static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2702{
2703 return do_zip_q(s, a, true);
2704}
2705
234b48e9
RH
2706static gen_helper_gvec_3 * const uzp_fns[4] = {
2707 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2708 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2709};
2710
3a7be554 2711static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2712{
2713 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2714}
2715
3a7be554 2716static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2717{
2718 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2719}
2720
74b64b25
RH
2721static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2722{
2723 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2724 return false;
2725 }
2726 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
2727}
2728
2729static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2730{
2731 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2732 return false;
2733 }
2734 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
2735}
2736
234b48e9
RH
2737static gen_helper_gvec_3 * const trn_fns[4] = {
2738 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2739 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2740};
2741
3a7be554 2742static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2743{
2744 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2745}
2746
3a7be554 2747static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2748{
2749 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2750}
2751
74b64b25
RH
2752static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2753{
2754 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2755 return false;
2756 }
2757 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
2758}
2759
2760static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2761{
2762 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2763 return false;
2764 }
2765 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
2766}
2767
3ca879ae
RH
2768/*
2769 *** SVE Permute Vector - Predicated Group
2770 */
2771
3a7be554 2772static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2773{
2774 static gen_helper_gvec_3 * const fns[4] = {
2775 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2776 };
2777 return do_zpz_ool(s, a, fns[a->esz]);
2778}
2779
ef23cb72
RH
2780/* Call the helper that computes the ARM LastActiveElement pseudocode
2781 * function, scaled by the element size. This includes the not found
2782 * indication; e.g. not found for esz=3 is -8.
2783 */
2784static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2785{
2786 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2787 * round up, as we do elsewhere, because we need the exact size.
2788 */
2789 TCGv_ptr t_p = tcg_temp_new_ptr();
2790 TCGv_i32 t_desc;
2acbfbe4 2791 unsigned desc = 0;
ef23cb72 2792
2acbfbe4
RH
2793 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2794 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2795
2796 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2797 t_desc = tcg_const_i32(desc);
2798
2799 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2800
2801 tcg_temp_free_i32(t_desc);
2802 tcg_temp_free_ptr(t_p);
2803}
2804
2805/* Increment LAST to the offset of the next element in the vector,
2806 * wrapping around to 0.
2807 */
2808static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2809{
2810 unsigned vsz = vec_full_reg_size(s);
2811
2812 tcg_gen_addi_i32(last, last, 1 << esz);
2813 if (is_power_of_2(vsz)) {
2814 tcg_gen_andi_i32(last, last, vsz - 1);
2815 } else {
2816 TCGv_i32 max = tcg_const_i32(vsz);
2817 TCGv_i32 zero = tcg_const_i32(0);
2818 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2819 tcg_temp_free_i32(max);
2820 tcg_temp_free_i32(zero);
2821 }
2822}
2823
2824/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2825static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2826{
2827 unsigned vsz = vec_full_reg_size(s);
2828
2829 if (is_power_of_2(vsz)) {
2830 tcg_gen_andi_i32(last, last, vsz - 1);
2831 } else {
2832 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2833 TCGv_i32 zero = tcg_const_i32(0);
2834 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2835 tcg_temp_free_i32(max);
2836 tcg_temp_free_i32(zero);
2837 }
2838}
2839
2840/* Load an unsigned element of ESZ from BASE+OFS. */
2841static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2842{
2843 TCGv_i64 r = tcg_temp_new_i64();
2844
2845 switch (esz) {
2846 case 0:
2847 tcg_gen_ld8u_i64(r, base, ofs);
2848 break;
2849 case 1:
2850 tcg_gen_ld16u_i64(r, base, ofs);
2851 break;
2852 case 2:
2853 tcg_gen_ld32u_i64(r, base, ofs);
2854 break;
2855 case 3:
2856 tcg_gen_ld_i64(r, base, ofs);
2857 break;
2858 default:
2859 g_assert_not_reached();
2860 }
2861 return r;
2862}
2863
2864/* Load an unsigned element of ESZ from RM[LAST]. */
2865static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2866 int rm, int esz)
2867{
2868 TCGv_ptr p = tcg_temp_new_ptr();
2869 TCGv_i64 r;
2870
2871 /* Convert offset into vector into offset into ENV.
2872 * The final adjustment for the vector register base
2873 * is added via constant offset to the load.
2874 */
2875#ifdef HOST_WORDS_BIGENDIAN
2876 /* Adjust for element ordering. See vec_reg_offset. */
2877 if (esz < 3) {
2878 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2879 }
2880#endif
2881 tcg_gen_ext_i32_ptr(p, last);
2882 tcg_gen_add_ptr(p, p, cpu_env);
2883
2884 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2885 tcg_temp_free_ptr(p);
2886
2887 return r;
2888}
2889
2890/* Compute CLAST for a Zreg. */
2891static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2892{
2893 TCGv_i32 last;
2894 TCGLabel *over;
2895 TCGv_i64 ele;
2896 unsigned vsz, esz = a->esz;
2897
2898 if (!sve_access_check(s)) {
2899 return true;
2900 }
2901
2902 last = tcg_temp_local_new_i32();
2903 over = gen_new_label();
2904
2905 find_last_active(s, last, esz, a->pg);
2906
2907 /* There is of course no movcond for a 2048-bit vector,
2908 * so we must branch over the actual store.
2909 */
2910 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2911
2912 if (!before) {
2913 incr_last_active(s, last, esz);
2914 }
2915
2916 ele = load_last_active(s, last, a->rm, esz);
2917 tcg_temp_free_i32(last);
2918
2919 vsz = vec_full_reg_size(s);
2920 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2921 tcg_temp_free_i64(ele);
2922
2923 /* If this insn used MOVPRFX, we may need a second move. */
2924 if (a->rd != a->rn) {
2925 TCGLabel *done = gen_new_label();
2926 tcg_gen_br(done);
2927
2928 gen_set_label(over);
2929 do_mov_z(s, a->rd, a->rn);
2930
2931 gen_set_label(done);
2932 } else {
2933 gen_set_label(over);
2934 }
2935 return true;
2936}
2937
3a7be554 2938static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2939{
2940 return do_clast_vector(s, a, false);
2941}
2942
3a7be554 2943static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2944{
2945 return do_clast_vector(s, a, true);
2946}
2947
2948/* Compute CLAST for a scalar. */
2949static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2950 bool before, TCGv_i64 reg_val)
2951{
2952 TCGv_i32 last = tcg_temp_new_i32();
2953 TCGv_i64 ele, cmp, zero;
2954
2955 find_last_active(s, last, esz, pg);
2956
2957 /* Extend the original value of last prior to incrementing. */
2958 cmp = tcg_temp_new_i64();
2959 tcg_gen_ext_i32_i64(cmp, last);
2960
2961 if (!before) {
2962 incr_last_active(s, last, esz);
2963 }
2964
2965 /* The conceit here is that while last < 0 indicates not found, after
2966 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2967 * from which we can load garbage. We then discard the garbage with
2968 * a conditional move.
2969 */
2970 ele = load_last_active(s, last, rm, esz);
2971 tcg_temp_free_i32(last);
2972
2973 zero = tcg_const_i64(0);
2974 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2975
2976 tcg_temp_free_i64(zero);
2977 tcg_temp_free_i64(cmp);
2978 tcg_temp_free_i64(ele);
2979}
2980
2981/* Compute CLAST for a Vreg. */
2982static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2983{
2984 if (sve_access_check(s)) {
2985 int esz = a->esz;
2986 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2987 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2988
2989 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2990 write_fp_dreg(s, a->rd, reg);
2991 tcg_temp_free_i64(reg);
2992 }
2993 return true;
2994}
2995
3a7be554 2996static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2997{
2998 return do_clast_fp(s, a, false);
2999}
3000
3a7be554 3001static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3002{
3003 return do_clast_fp(s, a, true);
3004}
3005
3006/* Compute CLAST for a Xreg. */
3007static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
3008{
3009 TCGv_i64 reg;
3010
3011 if (!sve_access_check(s)) {
3012 return true;
3013 }
3014
3015 reg = cpu_reg(s, a->rd);
3016 switch (a->esz) {
3017 case 0:
3018 tcg_gen_ext8u_i64(reg, reg);
3019 break;
3020 case 1:
3021 tcg_gen_ext16u_i64(reg, reg);
3022 break;
3023 case 2:
3024 tcg_gen_ext32u_i64(reg, reg);
3025 break;
3026 case 3:
3027 break;
3028 default:
3029 g_assert_not_reached();
3030 }
3031
3032 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
3033 return true;
3034}
3035
3a7be554 3036static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3037{
3038 return do_clast_general(s, a, false);
3039}
3040
3a7be554 3041static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3042{
3043 return do_clast_general(s, a, true);
3044}
3045
3046/* Compute LAST for a scalar. */
3047static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
3048 int pg, int rm, bool before)
3049{
3050 TCGv_i32 last = tcg_temp_new_i32();
3051 TCGv_i64 ret;
3052
3053 find_last_active(s, last, esz, pg);
3054 if (before) {
3055 wrap_last_active(s, last, esz);
3056 } else {
3057 incr_last_active(s, last, esz);
3058 }
3059
3060 ret = load_last_active(s, last, rm, esz);
3061 tcg_temp_free_i32(last);
3062 return ret;
3063}
3064
3065/* Compute LAST for a Vreg. */
3066static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
3067{
3068 if (sve_access_check(s)) {
3069 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3070 write_fp_dreg(s, a->rd, val);
3071 tcg_temp_free_i64(val);
3072 }
3073 return true;
3074}
3075
3a7be554 3076static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3077{
3078 return do_last_fp(s, a, false);
3079}
3080
3a7be554 3081static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3082{
3083 return do_last_fp(s, a, true);
3084}
3085
3086/* Compute LAST for a Xreg. */
3087static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3088{
3089 if (sve_access_check(s)) {
3090 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3091 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3092 tcg_temp_free_i64(val);
3093 }
3094 return true;
3095}
3096
3a7be554 3097static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3098{
3099 return do_last_general(s, a, false);
3100}
3101
3a7be554 3102static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3103{
3104 return do_last_general(s, a, true);
3105}
3106
3a7be554 3107static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3108{
3109 if (sve_access_check(s)) {
3110 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3111 }
3112 return true;
3113}
3114
3a7be554 3115static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3116{
3117 if (sve_access_check(s)) {
3118 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3119 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3120 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3121 tcg_temp_free_i64(t);
3122 }
3123 return true;
3124}
3125
3a7be554 3126static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3127{
3128 static gen_helper_gvec_3 * const fns[4] = {
3129 NULL,
3130 gen_helper_sve_revb_h,
3131 gen_helper_sve_revb_s,
3132 gen_helper_sve_revb_d,
3133 };
3134 return do_zpz_ool(s, a, fns[a->esz]);
3135}
3136
3a7be554 3137static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3138{
3139 static gen_helper_gvec_3 * const fns[4] = {
3140 NULL,
3141 NULL,
3142 gen_helper_sve_revh_s,
3143 gen_helper_sve_revh_d,
3144 };
3145 return do_zpz_ool(s, a, fns[a->esz]);
3146}
3147
3a7be554 3148static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3149{
3150 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3151}
3152
3a7be554 3153static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3154{
3155 static gen_helper_gvec_3 * const fns[4] = {
3156 gen_helper_sve_rbit_b,
3157 gen_helper_sve_rbit_h,
3158 gen_helper_sve_rbit_s,
3159 gen_helper_sve_rbit_d,
3160 };
3161 return do_zpz_ool(s, a, fns[a->esz]);
3162}
3163
3a7be554 3164static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
3165{
3166 if (sve_access_check(s)) {
36cbb7a8 3167 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 3168 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
3169 }
3170 return true;
3171}
3172
75114792
SL
3173static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3174{
3175 if (!dc_isar_feature(aa64_sve2, s)) {
3176 return false;
3177 }
3178 if (sve_access_check(s)) {
3179 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3180 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3181 }
3182 return true;
3183}
3184
757f9cff
RH
3185/*
3186 *** SVE Integer Compare - Vectors Group
3187 */
3188
3189static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3190 gen_helper_gvec_flags_4 *gen_fn)
3191{
3192 TCGv_ptr pd, zn, zm, pg;
3193 unsigned vsz;
3194 TCGv_i32 t;
3195
3196 if (gen_fn == NULL) {
3197 return false;
3198 }
3199 if (!sve_access_check(s)) {
3200 return true;
3201 }
3202
3203 vsz = vec_full_reg_size(s);
3204 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
3205 pd = tcg_temp_new_ptr();
3206 zn = tcg_temp_new_ptr();
3207 zm = tcg_temp_new_ptr();
3208 pg = tcg_temp_new_ptr();
3209
3210 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3211 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3212 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3213 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3214
3215 gen_fn(t, pd, zn, zm, pg, t);
3216
3217 tcg_temp_free_ptr(pd);
3218 tcg_temp_free_ptr(zn);
3219 tcg_temp_free_ptr(zm);
3220 tcg_temp_free_ptr(pg);
3221
3222 do_pred_flags(t);
3223
3224 tcg_temp_free_i32(t);
3225 return true;
3226}
3227
3228#define DO_PPZZ(NAME, name) \
3a7be554 3229static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3230{ \
3231 static gen_helper_gvec_flags_4 * const fns[4] = { \
3232 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3233 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3234 }; \
3235 return do_ppzz_flags(s, a, fns[a->esz]); \
3236}
3237
3238DO_PPZZ(CMPEQ, cmpeq)
3239DO_PPZZ(CMPNE, cmpne)
3240DO_PPZZ(CMPGT, cmpgt)
3241DO_PPZZ(CMPGE, cmpge)
3242DO_PPZZ(CMPHI, cmphi)
3243DO_PPZZ(CMPHS, cmphs)
3244
3245#undef DO_PPZZ
3246
3247#define DO_PPZW(NAME, name) \
3a7be554 3248static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3249{ \
3250 static gen_helper_gvec_flags_4 * const fns[4] = { \
3251 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3252 gen_helper_sve_##name##_ppzw_s, NULL \
3253 }; \
3254 return do_ppzz_flags(s, a, fns[a->esz]); \
3255}
3256
3257DO_PPZW(CMPEQ, cmpeq)
3258DO_PPZW(CMPNE, cmpne)
3259DO_PPZW(CMPGT, cmpgt)
3260DO_PPZW(CMPGE, cmpge)
3261DO_PPZW(CMPHI, cmphi)
3262DO_PPZW(CMPHS, cmphs)
3263DO_PPZW(CMPLT, cmplt)
3264DO_PPZW(CMPLE, cmple)
3265DO_PPZW(CMPLO, cmplo)
3266DO_PPZW(CMPLS, cmpls)
3267
3268#undef DO_PPZW
3269
38cadeba
RH
3270/*
3271 *** SVE Integer Compare - Immediate Groups
3272 */
3273
3274static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3275 gen_helper_gvec_flags_3 *gen_fn)
3276{
3277 TCGv_ptr pd, zn, pg;
3278 unsigned vsz;
3279 TCGv_i32 t;
3280
3281 if (gen_fn == NULL) {
3282 return false;
3283 }
3284 if (!sve_access_check(s)) {
3285 return true;
3286 }
3287
3288 vsz = vec_full_reg_size(s);
3289 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3290 pd = tcg_temp_new_ptr();
3291 zn = tcg_temp_new_ptr();
3292 pg = tcg_temp_new_ptr();
3293
3294 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3295 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3296 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3297
3298 gen_fn(t, pd, zn, pg, t);
3299
3300 tcg_temp_free_ptr(pd);
3301 tcg_temp_free_ptr(zn);
3302 tcg_temp_free_ptr(pg);
3303
3304 do_pred_flags(t);
3305
3306 tcg_temp_free_i32(t);
3307 return true;
3308}
3309
3310#define DO_PPZI(NAME, name) \
3a7be554 3311static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3312{ \
3313 static gen_helper_gvec_flags_3 * const fns[4] = { \
3314 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3315 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3316 }; \
3317 return do_ppzi_flags(s, a, fns[a->esz]); \
3318}
3319
3320DO_PPZI(CMPEQ, cmpeq)
3321DO_PPZI(CMPNE, cmpne)
3322DO_PPZI(CMPGT, cmpgt)
3323DO_PPZI(CMPGE, cmpge)
3324DO_PPZI(CMPHI, cmphi)
3325DO_PPZI(CMPHS, cmphs)
3326DO_PPZI(CMPLT, cmplt)
3327DO_PPZI(CMPLE, cmple)
3328DO_PPZI(CMPLO, cmplo)
3329DO_PPZI(CMPLS, cmpls)
3330
3331#undef DO_PPZI
3332
35da316f
RH
3333/*
3334 *** SVE Partition Break Group
3335 */
3336
3337static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3338 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3339{
3340 if (!sve_access_check(s)) {
3341 return true;
3342 }
3343
3344 unsigned vsz = pred_full_reg_size(s);
3345
3346 /* Predicate sizes may be smaller and cannot use simd_desc. */
3347 TCGv_ptr d = tcg_temp_new_ptr();
3348 TCGv_ptr n = tcg_temp_new_ptr();
3349 TCGv_ptr m = tcg_temp_new_ptr();
3350 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3351 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3352
3353 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3354 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3355 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3356 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3357
3358 if (a->s) {
3359 fn_s(t, d, n, m, g, t);
3360 do_pred_flags(t);
3361 } else {
3362 fn(d, n, m, g, t);
3363 }
3364 tcg_temp_free_ptr(d);
3365 tcg_temp_free_ptr(n);
3366 tcg_temp_free_ptr(m);
3367 tcg_temp_free_ptr(g);
3368 tcg_temp_free_i32(t);
3369 return true;
3370}
3371
3372static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3373 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3374{
3375 if (!sve_access_check(s)) {
3376 return true;
3377 }
3378
3379 unsigned vsz = pred_full_reg_size(s);
3380
3381 /* Predicate sizes may be smaller and cannot use simd_desc. */
3382 TCGv_ptr d = tcg_temp_new_ptr();
3383 TCGv_ptr n = tcg_temp_new_ptr();
3384 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3385 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3386
3387 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3388 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3389 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3390
3391 if (a->s) {
3392 fn_s(t, d, n, g, t);
3393 do_pred_flags(t);
3394 } else {
3395 fn(d, n, g, t);
3396 }
3397 tcg_temp_free_ptr(d);
3398 tcg_temp_free_ptr(n);
3399 tcg_temp_free_ptr(g);
3400 tcg_temp_free_i32(t);
3401 return true;
3402}
3403
3a7be554 3404static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3405{
3406 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3407}
3408
3a7be554 3409static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3410{
3411 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3412}
3413
3a7be554 3414static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3415{
3416 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3417}
3418
3a7be554 3419static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3420{
3421 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3422}
3423
3a7be554 3424static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3425{
3426 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3427}
3428
3a7be554 3429static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3430{
3431 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3432}
3433
3a7be554 3434static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3435{
3436 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3437}
3438
9ee3a611
RH
3439/*
3440 *** SVE Predicate Count Group
3441 */
3442
3443static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3444{
3445 unsigned psz = pred_full_reg_size(s);
3446
3447 if (psz <= 8) {
3448 uint64_t psz_mask;
3449
3450 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3451 if (pn != pg) {
3452 TCGv_i64 g = tcg_temp_new_i64();
3453 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3454 tcg_gen_and_i64(val, val, g);
3455 tcg_temp_free_i64(g);
3456 }
3457
3458 /* Reduce the pred_esz_masks value simply to reduce the
3459 * size of the code generated here.
3460 */
3461 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3462 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3463
3464 tcg_gen_ctpop_i64(val, val);
3465 } else {
3466 TCGv_ptr t_pn = tcg_temp_new_ptr();
3467 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3468 unsigned desc = 0;
9ee3a611
RH
3469 TCGv_i32 t_desc;
3470
f556a201
RH
3471 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3472 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3473
3474 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3475 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3476 t_desc = tcg_const_i32(desc);
3477
3478 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3479 tcg_temp_free_ptr(t_pn);
3480 tcg_temp_free_ptr(t_pg);
3481 tcg_temp_free_i32(t_desc);
3482 }
3483}
3484
3a7be554 3485static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3486{
3487 if (sve_access_check(s)) {
3488 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3489 }
3490 return true;
3491}
3492
3a7be554 3493static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3494{
3495 if (sve_access_check(s)) {
3496 TCGv_i64 reg = cpu_reg(s, a->rd);
3497 TCGv_i64 val = tcg_temp_new_i64();
3498
3499 do_cntp(s, val, a->esz, a->pg, a->pg);
3500 if (a->d) {
3501 tcg_gen_sub_i64(reg, reg, val);
3502 } else {
3503 tcg_gen_add_i64(reg, reg, val);
3504 }
3505 tcg_temp_free_i64(val);
3506 }
3507 return true;
3508}
3509
3a7be554 3510static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3511{
3512 if (a->esz == 0) {
3513 return false;
3514 }
3515 if (sve_access_check(s)) {
3516 unsigned vsz = vec_full_reg_size(s);
3517 TCGv_i64 val = tcg_temp_new_i64();
3518 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3519
3520 do_cntp(s, val, a->esz, a->pg, a->pg);
3521 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3522 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3523 }
3524 return true;
3525}
3526
3a7be554 3527static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3528{
3529 if (sve_access_check(s)) {
3530 TCGv_i64 reg = cpu_reg(s, a->rd);
3531 TCGv_i64 val = tcg_temp_new_i64();
3532
3533 do_cntp(s, val, a->esz, a->pg, a->pg);
3534 do_sat_addsub_32(reg, val, a->u, a->d);
3535 }
3536 return true;
3537}
3538
3a7be554 3539static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3540{
3541 if (sve_access_check(s)) {
3542 TCGv_i64 reg = cpu_reg(s, a->rd);
3543 TCGv_i64 val = tcg_temp_new_i64();
3544
3545 do_cntp(s, val, a->esz, a->pg, a->pg);
3546 do_sat_addsub_64(reg, val, a->u, a->d);
3547 }
3548 return true;
3549}
3550
3a7be554 3551static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3552{
3553 if (a->esz == 0) {
3554 return false;
3555 }
3556 if (sve_access_check(s)) {
3557 TCGv_i64 val = tcg_temp_new_i64();
3558 do_cntp(s, val, a->esz, a->pg, a->pg);
3559 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3560 }
3561 return true;
3562}
3563
caf1cefc
RH
3564/*
3565 *** SVE Integer Compare Scalars Group
3566 */
3567
3a7be554 3568static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3569{
3570 if (!sve_access_check(s)) {
3571 return true;
3572 }
3573
3574 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3575 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3576 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3577 TCGv_i64 cmp = tcg_temp_new_i64();
3578
3579 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3580 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3581 tcg_temp_free_i64(cmp);
3582
3583 /* VF = !NF & !CF. */
3584 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3585 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3586
3587 /* Both NF and VF actually look at bit 31. */
3588 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3589 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3590 return true;
3591}
3592
3a7be554 3593static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3594{
bbd0968c 3595 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3596 TCGv_i32 t2, t3;
3597 TCGv_ptr ptr;
e610906c
RH
3598 unsigned vsz = vec_full_reg_size(s);
3599 unsigned desc = 0;
caf1cefc 3600 TCGCond cond;
34688dbc
RH
3601 uint64_t maxval;
3602 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3603 bool eq = a->eq == a->lt;
caf1cefc 3604
34688dbc
RH
3605 /* The greater-than conditions are all SVE2. */
3606 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3607 return false;
3608 }
bbd0968c
RH
3609 if (!sve_access_check(s)) {
3610 return true;
3611 }
3612
3613 op0 = read_cpu_reg(s, a->rn, 1);
3614 op1 = read_cpu_reg(s, a->rm, 1);
3615
caf1cefc
RH
3616 if (!a->sf) {
3617 if (a->u) {
3618 tcg_gen_ext32u_i64(op0, op0);
3619 tcg_gen_ext32u_i64(op1, op1);
3620 } else {
3621 tcg_gen_ext32s_i64(op0, op0);
3622 tcg_gen_ext32s_i64(op1, op1);
3623 }
3624 }
3625
3626 /* For the helper, compress the different conditions into a computation
3627 * of how many iterations for which the condition is true.
caf1cefc 3628 */
bbd0968c
RH
3629 t0 = tcg_temp_new_i64();
3630 t1 = tcg_temp_new_i64();
34688dbc
RH
3631
3632 if (a->lt) {
3633 tcg_gen_sub_i64(t0, op1, op0);
3634 if (a->u) {
3635 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3636 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3637 } else {
3638 maxval = a->sf ? INT64_MAX : INT32_MAX;
3639 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3640 }
3641 } else {
3642 tcg_gen_sub_i64(t0, op0, op1);
3643 if (a->u) {
3644 maxval = 0;
3645 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3646 } else {
3647 maxval = a->sf ? INT64_MIN : INT32_MIN;
3648 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3649 }
3650 }
caf1cefc 3651
bbd0968c 3652 tmax = tcg_const_i64(vsz >> a->esz);
34688dbc 3653 if (eq) {
caf1cefc
RH
3654 /* Equality means one more iteration. */
3655 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3656
34688dbc
RH
3657 /*
3658 * For the less-than while, if op1 is maxval (and the only time
3659 * the addition above could overflow), then we produce an all-true
3660 * predicate by setting the count to the vector length. This is
3661 * because the pseudocode is described as an increment + compare
3662 * loop, and the maximum integer would always compare true.
3663 * Similarly, the greater-than while has the same issue with the
3664 * minimum integer due to the decrement + compare loop.
bbd0968c 3665 */
34688dbc 3666 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3667 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3668 }
3669
bbd0968c
RH
3670 /* Bound to the maximum. */
3671 tcg_gen_umin_i64(t0, t0, tmax);
3672 tcg_temp_free_i64(tmax);
3673
3674 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3675 tcg_gen_movi_i64(t1, 0);
3676 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3677 tcg_temp_free_i64(t1);
caf1cefc 3678
bbd0968c 3679 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3680 t2 = tcg_temp_new_i32();
3681 tcg_gen_extrl_i64_i32(t2, t0);
3682 tcg_temp_free_i64(t0);
bbd0968c
RH
3683
3684 /* Scale elements to bits. */
3685 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3686
e610906c
RH
3687 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3688 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3689 t3 = tcg_const_i32(desc);
3690
3691 ptr = tcg_temp_new_ptr();
3692 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3693
34688dbc
RH
3694 if (a->lt) {
3695 gen_helper_sve_whilel(t2, ptr, t2, t3);
3696 } else {
3697 gen_helper_sve_whileg(t2, ptr, t2, t3);
3698 }
caf1cefc
RH
3699 do_pred_flags(t2);
3700
3701 tcg_temp_free_ptr(ptr);
3702 tcg_temp_free_i32(t2);
3703 tcg_temp_free_i32(t3);
3704 return true;
3705}
3706
14f6dad1
RH
3707static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3708{
3709 TCGv_i64 op0, op1, diff, t1, tmax;
3710 TCGv_i32 t2, t3;
3711 TCGv_ptr ptr;
3712 unsigned vsz = vec_full_reg_size(s);
3713 unsigned desc = 0;
3714
3715 if (!dc_isar_feature(aa64_sve2, s)) {
3716 return false;
3717 }
3718 if (!sve_access_check(s)) {
3719 return true;
3720 }
3721
3722 op0 = read_cpu_reg(s, a->rn, 1);
3723 op1 = read_cpu_reg(s, a->rm, 1);
3724
3725 tmax = tcg_const_i64(vsz);
3726 diff = tcg_temp_new_i64();
3727
3728 if (a->rw) {
3729 /* WHILERW */
3730 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3731 t1 = tcg_temp_new_i64();
3732 tcg_gen_sub_i64(diff, op0, op1);
3733 tcg_gen_sub_i64(t1, op1, op0);
3734 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3735 tcg_temp_free_i64(t1);
3736 /* Round down to a multiple of ESIZE. */
3737 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3738 /* If op1 == op0, diff == 0, and the condition is always true. */
3739 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3740 } else {
3741 /* WHILEWR */
3742 tcg_gen_sub_i64(diff, op1, op0);
3743 /* Round down to a multiple of ESIZE. */
3744 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3745 /* If op0 >= op1, diff <= 0, the condition is always true. */
3746 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3747 }
3748
3749 /* Bound to the maximum. */
3750 tcg_gen_umin_i64(diff, diff, tmax);
3751 tcg_temp_free_i64(tmax);
3752
3753 /* Since we're bounded, pass as a 32-bit type. */
3754 t2 = tcg_temp_new_i32();
3755 tcg_gen_extrl_i64_i32(t2, diff);
3756 tcg_temp_free_i64(diff);
3757
3758 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3759 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3760 t3 = tcg_const_i32(desc);
3761
3762 ptr = tcg_temp_new_ptr();
3763 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3764
3765 gen_helper_sve_whilel(t2, ptr, t2, t3);
3766 do_pred_flags(t2);
3767
3768 tcg_temp_free_ptr(ptr);
3769 tcg_temp_free_i32(t2);
3770 tcg_temp_free_i32(t3);
3771 return true;
3772}
3773
ed491961
RH
3774/*
3775 *** SVE Integer Wide Immediate - Unpredicated Group
3776 */
3777
3a7be554 3778static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3779{
3780 if (a->esz == 0) {
3781 return false;
3782 }
3783 if (sve_access_check(s)) {
3784 unsigned vsz = vec_full_reg_size(s);
3785 int dofs = vec_full_reg_offset(s, a->rd);
3786 uint64_t imm;
3787
3788 /* Decode the VFP immediate. */
3789 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3790 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3791 }
3792 return true;
3793}
3794
3a7be554 3795static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3796{
3a7be554 3797 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3798 return false;
3799 }
3800 if (sve_access_check(s)) {
3801 unsigned vsz = vec_full_reg_size(s);
3802 int dofs = vec_full_reg_offset(s, a->rd);
3803
8711e71f 3804 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3805 }
3806 return true;
3807}
3808
3a7be554 3809static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3810{
3a7be554 3811 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3812 return false;
3813 }
3814 if (sve_access_check(s)) {
3815 unsigned vsz = vec_full_reg_size(s);
3816 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3817 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3818 }
3819 return true;
3820}
3821
3a7be554 3822static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3823{
3824 a->imm = -a->imm;
3a7be554 3825 return trans_ADD_zzi(s, a);
6e6a157d
RH
3826}
3827
3a7be554 3828static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3829{
53229a77 3830 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3831 static const GVecGen2s op[4] = {
3832 { .fni8 = tcg_gen_vec_sub8_i64,
3833 .fniv = tcg_gen_sub_vec,
3834 .fno = gen_helper_sve_subri_b,
53229a77 3835 .opt_opc = vecop_list,
6e6a157d
RH
3836 .vece = MO_8,
3837 .scalar_first = true },
3838 { .fni8 = tcg_gen_vec_sub16_i64,
3839 .fniv = tcg_gen_sub_vec,
3840 .fno = gen_helper_sve_subri_h,
53229a77 3841 .opt_opc = vecop_list,
6e6a157d
RH
3842 .vece = MO_16,
3843 .scalar_first = true },
3844 { .fni4 = tcg_gen_sub_i32,
3845 .fniv = tcg_gen_sub_vec,
3846 .fno = gen_helper_sve_subri_s,
53229a77 3847 .opt_opc = vecop_list,
6e6a157d
RH
3848 .vece = MO_32,
3849 .scalar_first = true },
3850 { .fni8 = tcg_gen_sub_i64,
3851 .fniv = tcg_gen_sub_vec,
3852 .fno = gen_helper_sve_subri_d,
53229a77 3853 .opt_opc = vecop_list,
6e6a157d
RH
3854 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3855 .vece = MO_64,
3856 .scalar_first = true }
3857 };
3858
3a7be554 3859 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3860 return false;
3861 }
3862 if (sve_access_check(s)) {
3863 unsigned vsz = vec_full_reg_size(s);
3864 TCGv_i64 c = tcg_const_i64(a->imm);
3865 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3866 vec_full_reg_offset(s, a->rn),
3867 vsz, vsz, c, &op[a->esz]);
3868 tcg_temp_free_i64(c);
3869 }
3870 return true;
3871}
3872
3a7be554 3873static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3874{
3875 if (sve_access_check(s)) {
3876 unsigned vsz = vec_full_reg_size(s);
3877 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3878 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3879 }
3880 return true;
3881}
3882
3a7be554 3883static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3884{
3a7be554 3885 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3886 return false;
3887 }
3888 if (sve_access_check(s)) {
3889 TCGv_i64 val = tcg_const_i64(a->imm);
3890 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3891 tcg_temp_free_i64(val);
3892 }
3893 return true;
3894}
3895
3a7be554 3896static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3897{
3a7be554 3898 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3899}
3900
3a7be554 3901static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3902{
3a7be554 3903 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3904}
3905
3a7be554 3906static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3907{
3a7be554 3908 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3909}
3910
3a7be554 3911static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3912{
3a7be554 3913 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3914}
3915
3916static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3917{
3918 if (sve_access_check(s)) {
3919 unsigned vsz = vec_full_reg_size(s);
3920 TCGv_i64 c = tcg_const_i64(a->imm);
3921
3922 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3923 vec_full_reg_offset(s, a->rn),
3924 c, vsz, vsz, 0, fn);
3925 tcg_temp_free_i64(c);
3926 }
3927 return true;
3928}
3929
3930#define DO_ZZI(NAME, name) \
3a7be554 3931static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3932{ \
3933 static gen_helper_gvec_2i * const fns[4] = { \
3934 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3935 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3936 }; \
3937 return do_zzi_ool(s, a, fns[a->esz]); \
3938}
3939
3940DO_ZZI(SMAX, smax)
3941DO_ZZI(UMAX, umax)
3942DO_ZZI(SMIN, smin)
3943DO_ZZI(UMIN, umin)
3944
3945#undef DO_ZZI
3946
bc2bd697 3947static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
d730ecaa 3948{
bc2bd697 3949 static gen_helper_gvec_4 * const fns[2][2] = {
d730ecaa
RH
3950 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3951 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3952 };
3953
3954 if (sve_access_check(s)) {
bc2bd697 3955 gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
d730ecaa
RH
3956 }
3957 return true;
3958}
3959
814d4c52
RH
3960/*
3961 * SVE Multiply - Indexed
3962 */
3963
0a82d963
RH
3964static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3965 gen_helper_gvec_4 *fn)
16fcfdc7 3966{
0a82d963
RH
3967 if (fn == NULL) {
3968 return false;
3969 }
16fcfdc7 3970 if (sve_access_check(s)) {
0a82d963 3971 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
16fcfdc7
RH
3972 }
3973 return true;
3974}
3975
0a82d963
RH
3976#define DO_RRXR(NAME, FUNC) \
3977 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3978 { return do_zzxz_ool(s, a, FUNC); }
3979
3980DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3981DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3982DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3983DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3984
2867039a
RH
3985static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3986{
3987 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3988 return false;
3989 }
3990 return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3991}
3992
3993static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3994{
3995 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3996 return false;
3997 }
3998 return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
3999}
4000
0a82d963 4001#undef DO_RRXR
16fcfdc7 4002
814d4c52
RH
4003static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
4004 gen_helper_gvec_3 *fn)
4005{
4006 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4007 return false;
4008 }
4009 if (sve_access_check(s)) {
4010 unsigned vsz = vec_full_reg_size(s);
4011 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
4012 vec_full_reg_offset(s, rn),
4013 vec_full_reg_offset(s, rm),
4014 vsz, vsz, data, fn);
4015 }
4016 return true;
4017}
4018
4019#define DO_SVE2_RRX(NAME, FUNC) \
4020 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4021 { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
4022
4023DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
4024DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
4025DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
4026
1aee2d70
RH
4027DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
4028DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
4029DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
4030
4031DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
4032DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
4033DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
4034
814d4c52
RH
4035#undef DO_SVE2_RRX
4036
b95f5eeb
RH
4037#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
4038 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4039 { \
4040 return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \
4041 (a->index << 1) | TOP, FUNC); \
4042 }
4043
4044DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
4045DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
4046DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
4047DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
4048
d3949c4c
RH
4049DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
4050DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
4051DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
4052DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
4053
4054DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
4055DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
4056DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
4057DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
4058
b95f5eeb
RH
4059#undef DO_SVE2_RRX_TB
4060
8a02aac7
RH
4061static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
4062 int data, gen_helper_gvec_4 *fn)
4063{
4064 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4065 return false;
4066 }
4067 if (sve_access_check(s)) {
4068 unsigned vsz = vec_full_reg_size(s);
4069 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
4070 vec_full_reg_offset(s, rn),
4071 vec_full_reg_offset(s, rm),
4072 vec_full_reg_offset(s, ra),
4073 vsz, vsz, data, fn);
4074 }
4075 return true;
4076}
4077
4078#define DO_SVE2_RRXR(NAME, FUNC) \
4079 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4080 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
4081
4082DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
4083DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
4084DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
4085
4086DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
4087DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4088DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4089
75d6d5fc
RH
4090DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4091DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4092DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4093
4094DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4095DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4096DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4097
8a02aac7
RH
4098#undef DO_SVE2_RRXR
4099
c5c455d7
RH
4100#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4101 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4102 { \
4103 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4104 (a->index << 1) | TOP, FUNC); \
4105 }
4106
4107DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4108DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4109DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4110DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4111
4112DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4113DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4114DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4115DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
d462469f
RH
4116
4117DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4118DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4119DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4120DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4121
4122DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4123DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4124DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4125DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4126
4127DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4128DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4129DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4130DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4131
4132DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4133DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4134DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4135DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
4136
4137#undef DO_SVE2_RRXR_TB
4138
3b787ed8
RH
4139#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4140 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
4141 { \
4142 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \
4143 (a->index << 2) | a->rot, FUNC); \
4144 }
4145
4146DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4147DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4148
4149DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4150DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4151
21068f39
RH
4152DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4153DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4154
3b787ed8
RH
4155#undef DO_SVE2_RRXR_ROT
4156
ca40a6e6
RH
4157/*
4158 *** SVE Floating Point Multiply-Add Indexed Group
4159 */
4160
0a82d963 4161static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
4162{
4163 static gen_helper_gvec_4_ptr * const fns[3] = {
4164 gen_helper_gvec_fmla_idx_h,
4165 gen_helper_gvec_fmla_idx_s,
4166 gen_helper_gvec_fmla_idx_d,
4167 };
4168
4169 if (sve_access_check(s)) {
4170 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4171 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4172 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4173 vec_full_reg_offset(s, a->rn),
4174 vec_full_reg_offset(s, a->rm),
4175 vec_full_reg_offset(s, a->ra),
0a82d963 4176 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
4177 fns[a->esz - 1]);
4178 tcg_temp_free_ptr(status);
4179 }
4180 return true;
4181}
4182
0a82d963
RH
4183static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4184{
4185 return do_FMLA_zzxz(s, a, false);
4186}
4187
4188static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4189{
4190 return do_FMLA_zzxz(s, a, true);
4191}
4192
ca40a6e6
RH
4193/*
4194 *** SVE Floating Point Multiply Indexed Group
4195 */
4196
3a7be554 4197static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
4198{
4199 static gen_helper_gvec_3_ptr * const fns[3] = {
4200 gen_helper_gvec_fmul_idx_h,
4201 gen_helper_gvec_fmul_idx_s,
4202 gen_helper_gvec_fmul_idx_d,
4203 };
4204
4205 if (sve_access_check(s)) {
4206 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4207 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4208 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4209 vec_full_reg_offset(s, a->rn),
4210 vec_full_reg_offset(s, a->rm),
4211 status, vsz, vsz, a->index, fns[a->esz - 1]);
4212 tcg_temp_free_ptr(status);
4213 }
4214 return true;
4215}
4216
23fbe79f
RH
4217/*
4218 *** SVE Floating Point Fast Reduction Group
4219 */
4220
4221typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4222 TCGv_ptr, TCGv_i32);
4223
4224static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4225 gen_helper_fp_reduce *fn)
4226{
4227 unsigned vsz = vec_full_reg_size(s);
4228 unsigned p2vsz = pow2ceil(vsz);
c648c9b7 4229 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
4230 TCGv_ptr t_zn, t_pg, status;
4231 TCGv_i64 temp;
4232
4233 temp = tcg_temp_new_i64();
4234 t_zn = tcg_temp_new_ptr();
4235 t_pg = tcg_temp_new_ptr();
4236
4237 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4238 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4239 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
4240
4241 fn(temp, t_zn, t_pg, status, t_desc);
4242 tcg_temp_free_ptr(t_zn);
4243 tcg_temp_free_ptr(t_pg);
4244 tcg_temp_free_ptr(status);
4245 tcg_temp_free_i32(t_desc);
4246
4247 write_fp_dreg(s, a->rd, temp);
4248 tcg_temp_free_i64(temp);
4249}
4250
4251#define DO_VPZ(NAME, name) \
3a7be554 4252static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
4253{ \
4254 static gen_helper_fp_reduce * const fns[3] = { \
4255 gen_helper_sve_##name##_h, \
4256 gen_helper_sve_##name##_s, \
4257 gen_helper_sve_##name##_d, \
4258 }; \
4259 if (a->esz == 0) { \
4260 return false; \
4261 } \
4262 if (sve_access_check(s)) { \
4263 do_reduce(s, a, fns[a->esz - 1]); \
4264 } \
4265 return true; \
4266}
4267
4268DO_VPZ(FADDV, faddv)
4269DO_VPZ(FMINNMV, fminnmv)
4270DO_VPZ(FMAXNMV, fmaxnmv)
4271DO_VPZ(FMINV, fminv)
4272DO_VPZ(FMAXV, fmaxv)
4273
3887c038
RH
4274/*
4275 *** SVE Floating Point Unary Operations - Unpredicated Group
4276 */
4277
4278static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4279{
4280 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4281 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
4282
4283 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4284 vec_full_reg_offset(s, a->rn),
4285 status, vsz, vsz, 0, fn);
4286 tcg_temp_free_ptr(status);
4287}
4288
3a7be554 4289static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4290{
4291 static gen_helper_gvec_2_ptr * const fns[3] = {
4292 gen_helper_gvec_frecpe_h,
4293 gen_helper_gvec_frecpe_s,
4294 gen_helper_gvec_frecpe_d,
4295 };
4296 if (a->esz == 0) {
4297 return false;
4298 }
4299 if (sve_access_check(s)) {
4300 do_zz_fp(s, a, fns[a->esz - 1]);
4301 }
4302 return true;
4303}
4304
3a7be554 4305static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4306{
4307 static gen_helper_gvec_2_ptr * const fns[3] = {
4308 gen_helper_gvec_frsqrte_h,
4309 gen_helper_gvec_frsqrte_s,
4310 gen_helper_gvec_frsqrte_d,
4311 };
4312 if (a->esz == 0) {
4313 return false;
4314 }
4315 if (sve_access_check(s)) {
4316 do_zz_fp(s, a, fns[a->esz - 1]);
4317 }
4318 return true;
4319}
4320
4d2e2a03
RH
4321/*
4322 *** SVE Floating Point Compare with Zero Group
4323 */
4324
4325static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4326 gen_helper_gvec_3_ptr *fn)
4327{
4328 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4329 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4330
4331 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4332 vec_full_reg_offset(s, a->rn),
4333 pred_full_reg_offset(s, a->pg),
4334 status, vsz, vsz, 0, fn);
4335 tcg_temp_free_ptr(status);
4336}
4337
4338#define DO_PPZ(NAME, name) \
3a7be554 4339static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4340{ \
4341 static gen_helper_gvec_3_ptr * const fns[3] = { \
4342 gen_helper_sve_##name##_h, \
4343 gen_helper_sve_##name##_s, \
4344 gen_helper_sve_##name##_d, \
4345 }; \
4346 if (a->esz == 0) { \
4347 return false; \
4348 } \
4349 if (sve_access_check(s)) { \
4350 do_ppz_fp(s, a, fns[a->esz - 1]); \
4351 } \
4352 return true; \
4353}
4354
4355DO_PPZ(FCMGE_ppz0, fcmge0)
4356DO_PPZ(FCMGT_ppz0, fcmgt0)
4357DO_PPZ(FCMLE_ppz0, fcmle0)
4358DO_PPZ(FCMLT_ppz0, fcmlt0)
4359DO_PPZ(FCMEQ_ppz0, fcmeq0)
4360DO_PPZ(FCMNE_ppz0, fcmne0)
4361
4362#undef DO_PPZ
4363
67fcd9ad
RH
4364/*
4365 *** SVE floating-point trig multiply-add coefficient
4366 */
4367
3a7be554 4368static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4369{
4370 static gen_helper_gvec_3_ptr * const fns[3] = {
4371 gen_helper_sve_ftmad_h,
4372 gen_helper_sve_ftmad_s,
4373 gen_helper_sve_ftmad_d,
4374 };
4375
4376 if (a->esz == 0) {
4377 return false;
4378 }
4379 if (sve_access_check(s)) {
4380 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4381 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4382 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4383 vec_full_reg_offset(s, a->rn),
4384 vec_full_reg_offset(s, a->rm),
4385 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4386 tcg_temp_free_ptr(status);
4387 }
4388 return true;
4389}
4390
7f9ddf64
RH
4391/*
4392 *** SVE Floating Point Accumulating Reduction Group
4393 */
4394
3a7be554 4395static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4396{
4397 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4398 TCGv_ptr, TCGv_ptr, TCGv_i32);
4399 static fadda_fn * const fns[3] = {
4400 gen_helper_sve_fadda_h,
4401 gen_helper_sve_fadda_s,
4402 gen_helper_sve_fadda_d,
4403 };
4404 unsigned vsz = vec_full_reg_size(s);
4405 TCGv_ptr t_rm, t_pg, t_fpst;
4406 TCGv_i64 t_val;
4407 TCGv_i32 t_desc;
4408
4409 if (a->esz == 0) {
4410 return false;
4411 }
4412 if (!sve_access_check(s)) {
4413 return true;
4414 }
4415
4416 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4417 t_rm = tcg_temp_new_ptr();
4418 t_pg = tcg_temp_new_ptr();
4419 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4420 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4421 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
4422 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4423
4424 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4425
4426 tcg_temp_free_i32(t_desc);
4427 tcg_temp_free_ptr(t_fpst);
4428 tcg_temp_free_ptr(t_pg);
4429 tcg_temp_free_ptr(t_rm);
4430
4431 write_fp_dreg(s, a->rd, t_val);
4432 tcg_temp_free_i64(t_val);
4433 return true;
4434}
4435
29b80469
RH
4436/*
4437 *** SVE Floating Point Arithmetic - Unpredicated Group
4438 */
4439
4440static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4441 gen_helper_gvec_3_ptr *fn)
4442{
4443 if (fn == NULL) {
4444 return false;
4445 }
4446 if (sve_access_check(s)) {
4447 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4448 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4449 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4450 vec_full_reg_offset(s, a->rn),
4451 vec_full_reg_offset(s, a->rm),
4452 status, vsz, vsz, 0, fn);
4453 tcg_temp_free_ptr(status);
4454 }
4455 return true;
4456}
4457
4458
4459#define DO_FP3(NAME, name) \
3a7be554 4460static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4461{ \
4462 static gen_helper_gvec_3_ptr * const fns[4] = { \
4463 NULL, gen_helper_gvec_##name##_h, \
4464 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4465 }; \
4466 return do_zzz_fp(s, a, fns[a->esz]); \
4467}
4468
4469DO_FP3(FADD_zzz, fadd)
4470DO_FP3(FSUB_zzz, fsub)
4471DO_FP3(FMUL_zzz, fmul)
4472DO_FP3(FTSMUL, ftsmul)
4473DO_FP3(FRECPS, recps)
4474DO_FP3(FRSQRTS, rsqrts)
4475
4476#undef DO_FP3
4477
ec3b87c2
RH
4478/*
4479 *** SVE Floating Point Arithmetic - Predicated Group
4480 */
4481
4482static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4483 gen_helper_gvec_4_ptr *fn)
4484{
4485 if (fn == NULL) {
4486 return false;
4487 }
4488 if (sve_access_check(s)) {
4489 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4490 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4491 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4492 vec_full_reg_offset(s, a->rn),
4493 vec_full_reg_offset(s, a->rm),
4494 pred_full_reg_offset(s, a->pg),
4495 status, vsz, vsz, 0, fn);
4496 tcg_temp_free_ptr(status);
4497 }
4498 return true;
4499}
4500
4501#define DO_FP3(NAME, name) \
3a7be554 4502static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4503{ \
4504 static gen_helper_gvec_4_ptr * const fns[4] = { \
4505 NULL, gen_helper_sve_##name##_h, \
4506 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4507 }; \
4508 return do_zpzz_fp(s, a, fns[a->esz]); \
4509}
4510
4511DO_FP3(FADD_zpzz, fadd)
4512DO_FP3(FSUB_zpzz, fsub)
4513DO_FP3(FMUL_zpzz, fmul)
4514DO_FP3(FMIN_zpzz, fmin)
4515DO_FP3(FMAX_zpzz, fmax)
4516DO_FP3(FMINNM_zpzz, fminnum)
4517DO_FP3(FMAXNM_zpzz, fmaxnum)
4518DO_FP3(FABD, fabd)
4519DO_FP3(FSCALE, fscalbn)
4520DO_FP3(FDIV, fdiv)
4521DO_FP3(FMULX, fmulx)
4522
4523#undef DO_FP3
8092c6a3 4524
cc48affe
RH
4525typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4526 TCGv_i64, TCGv_ptr, TCGv_i32);
4527
4528static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4529 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4530{
4531 unsigned vsz = vec_full_reg_size(s);
4532 TCGv_ptr t_zd, t_zn, t_pg, status;
4533 TCGv_i32 desc;
4534
4535 t_zd = tcg_temp_new_ptr();
4536 t_zn = tcg_temp_new_ptr();
4537 t_pg = tcg_temp_new_ptr();
4538 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4539 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4540 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4541
cdfb22bb 4542 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
4543 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4544 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4545
4546 tcg_temp_free_i32(desc);
4547 tcg_temp_free_ptr(status);
4548 tcg_temp_free_ptr(t_pg);
4549 tcg_temp_free_ptr(t_zn);
4550 tcg_temp_free_ptr(t_zd);
4551}
4552
4553static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4554 gen_helper_sve_fp2scalar *fn)
4555{
4556 TCGv_i64 temp = tcg_const_i64(imm);
4557 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
4558 tcg_temp_free_i64(temp);
4559}
4560
4561#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4562static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4563{ \
4564 static gen_helper_sve_fp2scalar * const fns[3] = { \
4565 gen_helper_sve_##name##_h, \
4566 gen_helper_sve_##name##_s, \
4567 gen_helper_sve_##name##_d \
4568 }; \
4569 static uint64_t const val[3][2] = { \
4570 { float16_##const0, float16_##const1 }, \
4571 { float32_##const0, float32_##const1 }, \
4572 { float64_##const0, float64_##const1 }, \
4573 }; \
4574 if (a->esz == 0) { \
4575 return false; \
4576 } \
4577 if (sve_access_check(s)) { \
4578 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4579 } \
4580 return true; \
4581}
4582
cc48affe
RH
4583DO_FP_IMM(FADD, fadds, half, one)
4584DO_FP_IMM(FSUB, fsubs, half, one)
4585DO_FP_IMM(FMUL, fmuls, half, two)
4586DO_FP_IMM(FSUBR, fsubrs, half, one)
4587DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4588DO_FP_IMM(FMINNM, fminnms, zero, one)
4589DO_FP_IMM(FMAX, fmaxs, zero, one)
4590DO_FP_IMM(FMIN, fmins, zero, one)
4591
4592#undef DO_FP_IMM
4593
abfdefd5
RH
4594static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4595 gen_helper_gvec_4_ptr *fn)
4596{
4597 if (fn == NULL) {
4598 return false;
4599 }
4600 if (sve_access_check(s)) {
4601 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4602 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4603 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4604 vec_full_reg_offset(s, a->rn),
4605 vec_full_reg_offset(s, a->rm),
4606 pred_full_reg_offset(s, a->pg),
4607 status, vsz, vsz, 0, fn);
4608 tcg_temp_free_ptr(status);
4609 }
4610 return true;
4611}
4612
4613#define DO_FPCMP(NAME, name) \
3a7be554 4614static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4615{ \
4616 static gen_helper_gvec_4_ptr * const fns[4] = { \
4617 NULL, gen_helper_sve_##name##_h, \
4618 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4619 }; \
4620 return do_fp_cmp(s, a, fns[a->esz]); \
4621}
4622
4623DO_FPCMP(FCMGE, fcmge)
4624DO_FPCMP(FCMGT, fcmgt)
4625DO_FPCMP(FCMEQ, fcmeq)
4626DO_FPCMP(FCMNE, fcmne)
4627DO_FPCMP(FCMUO, fcmuo)
4628DO_FPCMP(FACGE, facge)
4629DO_FPCMP(FACGT, facgt)
4630
4631#undef DO_FPCMP
4632
3a7be554 4633static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4634{
4635 static gen_helper_gvec_4_ptr * const fns[3] = {
4636 gen_helper_sve_fcadd_h,
4637 gen_helper_sve_fcadd_s,
4638 gen_helper_sve_fcadd_d
4639 };
4640
4641 if (a->esz == 0) {
4642 return false;
4643 }
4644 if (sve_access_check(s)) {
4645 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4646 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4647 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4648 vec_full_reg_offset(s, a->rn),
4649 vec_full_reg_offset(s, a->rm),
4650 pred_full_reg_offset(s, a->pg),
4651 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4652 tcg_temp_free_ptr(status);
4653 }
4654 return true;
4655}
4656
08975da9
RH
4657static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4658 gen_helper_gvec_5_ptr *fn)
6ceabaad 4659{
08975da9 4660 if (a->esz == 0) {
6ceabaad
RH
4661 return false;
4662 }
08975da9
RH
4663 if (sve_access_check(s)) {
4664 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4665 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4666 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4667 vec_full_reg_offset(s, a->rn),
4668 vec_full_reg_offset(s, a->rm),
4669 vec_full_reg_offset(s, a->ra),
4670 pred_full_reg_offset(s, a->pg),
4671 status, vsz, vsz, 0, fn);
4672 tcg_temp_free_ptr(status);
6ceabaad 4673 }
6ceabaad
RH
4674 return true;
4675}
4676
4677#define DO_FMLA(NAME, name) \
3a7be554 4678static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4679{ \
08975da9 4680 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4681 NULL, gen_helper_sve_##name##_h, \
4682 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4683 }; \
4684 return do_fmla(s, a, fns[a->esz]); \
4685}
4686
4687DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4688DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4689DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4690DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4691
4692#undef DO_FMLA
4693
3a7be554 4694static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4695{
08975da9
RH
4696 static gen_helper_gvec_5_ptr * const fns[4] = {
4697 NULL,
05f48bab
RH
4698 gen_helper_sve_fcmla_zpzzz_h,
4699 gen_helper_sve_fcmla_zpzzz_s,
4700 gen_helper_sve_fcmla_zpzzz_d,
4701 };
4702
4703 if (a->esz == 0) {
4704 return false;
4705 }
4706 if (sve_access_check(s)) {
4707 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4708 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4709 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4710 vec_full_reg_offset(s, a->rn),
4711 vec_full_reg_offset(s, a->rm),
4712 vec_full_reg_offset(s, a->ra),
4713 pred_full_reg_offset(s, a->pg),
4714 status, vsz, vsz, a->rot, fns[a->esz]);
4715 tcg_temp_free_ptr(status);
05f48bab
RH
4716 }
4717 return true;
4718}
4719
3a7be554 4720static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4721{
636ddeb1 4722 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4723 gen_helper_gvec_fcmlah_idx,
4724 gen_helper_gvec_fcmlas_idx,
4725 };
4726
4727 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4728 tcg_debug_assert(a->rd == a->ra);
4729 if (sve_access_check(s)) {
4730 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4731 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4732 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4733 vec_full_reg_offset(s, a->rn),
4734 vec_full_reg_offset(s, a->rm),
636ddeb1 4735 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4736 status, vsz, vsz,
4737 a->index * 4 + a->rot,
4738 fns[a->esz - 1]);
4739 tcg_temp_free_ptr(status);
4740 }
4741 return true;
4742}
4743
8092c6a3
RH
4744/*
4745 *** SVE Floating Point Unary Operations Predicated Group
4746 */
4747
4748static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4749 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4750{
4751 if (sve_access_check(s)) {
4752 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4753 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4754 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4755 vec_full_reg_offset(s, rn),
4756 pred_full_reg_offset(s, pg),
4757 status, vsz, vsz, 0, fn);
4758 tcg_temp_free_ptr(status);
4759 }
4760 return true;
4761}
4762
3a7be554 4763static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4764{
e4ab5124 4765 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4766}
4767
3a7be554 4768static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4769{
4770 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4771}
4772
d29b17ca
RH
4773static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4774{
4775 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4776 return false;
4777 }
4778 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4779}
4780
3a7be554 4781static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4782{
e4ab5124 4783 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4784}
4785
3a7be554 4786static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4787{
4788 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4789}
4790
3a7be554 4791static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4792{
4793 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4794}
4795
3a7be554 4796static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4797{
4798 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4799}
4800
3a7be554 4801static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4802{
4803 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4804}
4805
3a7be554 4806static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4807{
4808 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4809}
4810
3a7be554 4811static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4812{
4813 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4814}
4815
3a7be554 4816static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4817{
4818 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4819}
4820
3a7be554 4821static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4822{
4823 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4824}
4825
3a7be554 4826static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4827{
4828 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4829}
4830
3a7be554 4831static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4832{
4833 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4834}
4835
3a7be554 4836static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4837{
4838 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4839}
4840
3a7be554 4841static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4842{
4843 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4844}
4845
3a7be554 4846static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4847{
4848 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4849}
4850
3a7be554 4851static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4852{
4853 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4854}
4855
3a7be554 4856static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4857{
4858 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4859}
4860
3a7be554 4861static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4862{
4863 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4864}
4865
3a7be554 4866static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4867{
4868 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4869}
4870
cda3c753
RH
4871static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4872 gen_helper_sve_frint_h,
4873 gen_helper_sve_frint_s,
4874 gen_helper_sve_frint_d
4875};
4876
3a7be554 4877static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4878{
4879 if (a->esz == 0) {
4880 return false;
4881 }
4882 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4883 frint_fns[a->esz - 1]);
4884}
4885
3a7be554 4886static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4887{
4888 static gen_helper_gvec_3_ptr * const fns[3] = {
4889 gen_helper_sve_frintx_h,
4890 gen_helper_sve_frintx_s,
4891 gen_helper_sve_frintx_d
4892 };
4893 if (a->esz == 0) {
4894 return false;
4895 }
4896 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4897}
4898
95365277
SL
4899static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4900 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4901{
cda3c753
RH
4902 if (sve_access_check(s)) {
4903 unsigned vsz = vec_full_reg_size(s);
4904 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4905 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4906
4907 gen_helper_set_rmode(tmode, tmode, status);
4908
4909 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4910 vec_full_reg_offset(s, a->rn),
4911 pred_full_reg_offset(s, a->pg),
95365277 4912 status, vsz, vsz, 0, fn);
cda3c753
RH
4913
4914 gen_helper_set_rmode(tmode, tmode, status);
4915 tcg_temp_free_i32(tmode);
4916 tcg_temp_free_ptr(status);
4917 }
4918 return true;
4919}
4920
3a7be554 4921static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4922{
95365277
SL
4923 if (a->esz == 0) {
4924 return false;
4925 }
4926 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4927}
4928
3a7be554 4929static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4930{
95365277
SL
4931 if (a->esz == 0) {
4932 return false;
4933 }
4934 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4935}
4936
3a7be554 4937static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4938{
95365277
SL
4939 if (a->esz == 0) {
4940 return false;
4941 }
4942 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4943}
4944
3a7be554 4945static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4946{
95365277
SL
4947 if (a->esz == 0) {
4948 return false;
4949 }
4950 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4951}
4952
3a7be554 4953static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4954{
95365277
SL
4955 if (a->esz == 0) {
4956 return false;
4957 }
4958 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4959}
4960
3a7be554 4961static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4962{
4963 static gen_helper_gvec_3_ptr * const fns[3] = {
4964 gen_helper_sve_frecpx_h,
4965 gen_helper_sve_frecpx_s,
4966 gen_helper_sve_frecpx_d
4967 };
4968 if (a->esz == 0) {
4969 return false;
4970 }
4971 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4972}
4973
3a7be554 4974static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4975{
4976 static gen_helper_gvec_3_ptr * const fns[3] = {
4977 gen_helper_sve_fsqrt_h,
4978 gen_helper_sve_fsqrt_s,
4979 gen_helper_sve_fsqrt_d
4980 };
4981 if (a->esz == 0) {
4982 return false;
4983 }
4984 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4985}
4986
3a7be554 4987static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4988{
4989 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4990}
4991
3a7be554 4992static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4993{
4994 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4995}
4996
3a7be554 4997static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4998{
4999 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
5000}
5001
3a7be554 5002static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5003{
5004 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
5005}
5006
3a7be554 5007static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5008{
5009 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
5010}
5011
3a7be554 5012static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5013{
5014 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
5015}
5016
3a7be554 5017static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5018{
5019 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
5020}
5021
3a7be554 5022static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5023{
5024 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
5025}
5026
3a7be554 5027static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5028{
5029 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
5030}
5031
3a7be554 5032static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5033{
5034 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
5035}
5036
3a7be554 5037static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5038{
5039 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
5040}
5041
3a7be554 5042static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5043{
5044 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
5045}
5046
3a7be554 5047static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5048{
5049 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
5050}
5051
3a7be554 5052static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5053{
5054 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
5055}
5056
d1822297
RH
5057/*
5058 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
5059 */
5060
5061/* Subroutine loading a vector register at VOFS of LEN bytes.
5062 * The load should begin at the address Rn + IMM.
5063 */
5064
19f2acc9 5065static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 5066{
19f2acc9
RH
5067 int len_align = QEMU_ALIGN_DOWN(len, 8);
5068 int len_remain = len % 8;
5069 int nparts = len / 8 + ctpop8(len_remain);
d1822297 5070 int midx = get_mem_index(s);
b2aa8879 5071 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 5072
b2aa8879
RH
5073 dirty_addr = tcg_temp_new_i64();
5074 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5075 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 5076 tcg_temp_free_i64(dirty_addr);
d1822297 5077
b2aa8879
RH
5078 /*
5079 * Note that unpredicated load/store of vector/predicate registers
d1822297 5080 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 5081 * operations on larger quantities.
d1822297
RH
5082 * Attempt to keep code expansion to a minimum by limiting the
5083 * amount of unrolling done.
5084 */
5085 if (nparts <= 4) {
5086 int i;
5087
b2aa8879 5088 t0 = tcg_temp_new_i64();
d1822297 5089 for (i = 0; i < len_align; i += 8) {
fc313c64 5090 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 5091 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 5092 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5093 }
b2aa8879 5094 tcg_temp_free_i64(t0);
d1822297
RH
5095 } else {
5096 TCGLabel *loop = gen_new_label();
5097 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5098
b2aa8879
RH
5099 /* Copy the clean address into a local temp, live across the loop. */
5100 t0 = clean_addr;
4b4dc975 5101 clean_addr = new_tmp_a64_local(s);
b2aa8879 5102 tcg_gen_mov_i64(clean_addr, t0);
d1822297 5103
b2aa8879 5104 gen_set_label(loop);
d1822297 5105
b2aa8879 5106 t0 = tcg_temp_new_i64();
fc313c64 5107 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 5108 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5109
b2aa8879 5110 tp = tcg_temp_new_ptr();
d1822297
RH
5111 tcg_gen_add_ptr(tp, cpu_env, i);
5112 tcg_gen_addi_ptr(i, i, 8);
5113 tcg_gen_st_i64(t0, tp, vofs);
5114 tcg_temp_free_ptr(tp);
b2aa8879 5115 tcg_temp_free_i64(t0);
d1822297
RH
5116
5117 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5118 tcg_temp_free_ptr(i);
5119 }
5120
b2aa8879
RH
5121 /*
5122 * Predicate register loads can be any multiple of 2.
d1822297
RH
5123 * Note that we still store the entire 64-bit unit into cpu_env.
5124 */
5125 if (len_remain) {
b2aa8879 5126 t0 = tcg_temp_new_i64();
d1822297
RH
5127 switch (len_remain) {
5128 case 2:
5129 case 4:
5130 case 8:
b2aa8879
RH
5131 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5132 MO_LE | ctz32(len_remain));
d1822297
RH
5133 break;
5134
5135 case 6:
5136 t1 = tcg_temp_new_i64();
b2aa8879
RH
5137 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5138 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5139 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
5140 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5141 tcg_temp_free_i64(t1);
5142 break;
5143
5144 default:
5145 g_assert_not_reached();
5146 }
5147 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 5148 tcg_temp_free_i64(t0);
d1822297 5149 }
d1822297
RH
5150}
5151
5047c204 5152/* Similarly for stores. */
19f2acc9 5153static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 5154{
19f2acc9
RH
5155 int len_align = QEMU_ALIGN_DOWN(len, 8);
5156 int len_remain = len % 8;
5157 int nparts = len / 8 + ctpop8(len_remain);
5047c204 5158 int midx = get_mem_index(s);
bba87d0a 5159 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 5160
bba87d0a
RH
5161 dirty_addr = tcg_temp_new_i64();
5162 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5163 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 5164 tcg_temp_free_i64(dirty_addr);
5047c204
RH
5165
5166 /* Note that unpredicated load/store of vector/predicate registers
5167 * are defined as a stream of bytes, which equates to little-endian
5168 * operations on larger quantities. There is no nice way to force
5169 * a little-endian store for aarch64_be-linux-user out of line.
5170 *
5171 * Attempt to keep code expansion to a minimum by limiting the
5172 * amount of unrolling done.
5173 */
5174 if (nparts <= 4) {
5175 int i;
5176
bba87d0a 5177 t0 = tcg_temp_new_i64();
5047c204
RH
5178 for (i = 0; i < len_align; i += 8) {
5179 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 5180 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 5181 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 5182 }
bba87d0a 5183 tcg_temp_free_i64(t0);
5047c204
RH
5184 } else {
5185 TCGLabel *loop = gen_new_label();
bba87d0a 5186 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 5187
bba87d0a
RH
5188 /* Copy the clean address into a local temp, live across the loop. */
5189 t0 = clean_addr;
4b4dc975 5190 clean_addr = new_tmp_a64_local(s);
bba87d0a 5191 tcg_gen_mov_i64(clean_addr, t0);
5047c204 5192
bba87d0a 5193 gen_set_label(loop);
5047c204 5194
bba87d0a
RH
5195 t0 = tcg_temp_new_i64();
5196 tp = tcg_temp_new_ptr();
5197 tcg_gen_add_ptr(tp, cpu_env, i);
5198 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 5199 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
5200 tcg_temp_free_ptr(tp);
5201
fc313c64 5202 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
5203 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5204 tcg_temp_free_i64(t0);
5047c204
RH
5205
5206 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5207 tcg_temp_free_ptr(i);
5208 }
5209
5210 /* Predicate register stores can be any multiple of 2. */
5211 if (len_remain) {
bba87d0a 5212 t0 = tcg_temp_new_i64();
5047c204 5213 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
5214
5215 switch (len_remain) {
5216 case 2:
5217 case 4:
5218 case 8:
bba87d0a
RH
5219 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5220 MO_LE | ctz32(len_remain));
5047c204
RH
5221 break;
5222
5223 case 6:
bba87d0a
RH
5224 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5225 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 5226 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 5227 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
5228 break;
5229
5230 default:
5231 g_assert_not_reached();
5232 }
bba87d0a 5233 tcg_temp_free_i64(t0);
5047c204 5234 }
5047c204
RH
5235}
5236
3a7be554 5237static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
5238{
5239 if (sve_access_check(s)) {
5240 int size = vec_full_reg_size(s);
5241 int off = vec_full_reg_offset(s, a->rd);
5242 do_ldr(s, off, size, a->rn, a->imm * size);
5243 }
5244 return true;
5245}
5246
3a7be554 5247static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
5248{
5249 if (sve_access_check(s)) {
5250 int size = pred_full_reg_size(s);
5251 int off = pred_full_reg_offset(s, a->rd);
5252 do_ldr(s, off, size, a->rn, a->imm * size);
5253 }
5254 return true;
5255}
c4e7c493 5256
3a7be554 5257static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
5258{
5259 if (sve_access_check(s)) {
5260 int size = vec_full_reg_size(s);
5261 int off = vec_full_reg_offset(s, a->rd);
5262 do_str(s, off, size, a->rn, a->imm * size);
5263 }
5264 return true;
5265}
5266
3a7be554 5267static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
5268{
5269 if (sve_access_check(s)) {
5270 int size = pred_full_reg_size(s);
5271 int off = pred_full_reg_offset(s, a->rd);
5272 do_str(s, off, size, a->rn, a->imm * size);
5273 }
5274 return true;
5275}
5276
c4e7c493
RH
5277/*
5278 *** SVE Memory - Contiguous Load Group
5279 */
5280
5281/* The memory mode of the dtype. */
14776ab5 5282static const MemOp dtype_mop[16] = {
c4e7c493
RH
5283 MO_UB, MO_UB, MO_UB, MO_UB,
5284 MO_SL, MO_UW, MO_UW, MO_UW,
5285 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 5286 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
5287};
5288
5289#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5290
5291/* The vector element size of dtype. */
5292static const uint8_t dtype_esz[16] = {
5293 0, 1, 2, 3,
5294 3, 1, 2, 3,
5295 3, 2, 2, 3,
5296 3, 2, 1, 3
5297};
5298
5299static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
5300 int dtype, uint32_t mte_n, bool is_write,
5301 gen_helper_gvec_mem *fn)
c4e7c493
RH
5302{
5303 unsigned vsz = vec_full_reg_size(s);
5304 TCGv_ptr t_pg;
500d0484 5305 TCGv_i32 t_desc;
206adacf 5306 int desc = 0;
c4e7c493 5307
206adacf
RH
5308 /*
5309 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
5310 * registers as pointers, so encode the regno into the data field.
5311 * For consistency, do this even for LD1.
5312 */
9473d0ec 5313 if (s->mte_active[0]) {
206adacf
RH
5314 int msz = dtype_msz(dtype);
5315
5316 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5317 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5318 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5319 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5320 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5321 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5322 } else {
5323 addr = clean_data_tbi(s, addr);
206adacf 5324 }
9473d0ec 5325
206adacf 5326 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 5327 t_desc = tcg_const_i32(desc);
c4e7c493
RH
5328 t_pg = tcg_temp_new_ptr();
5329
5330 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 5331 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
5332
5333 tcg_temp_free_ptr(t_pg);
500d0484 5334 tcg_temp_free_i32(t_desc);
c4e7c493
RH
5335}
5336
c182c6db
RH
5337/* Indexed by [mte][be][dtype][nreg] */
5338static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5339 { /* mte inactive, little-endian */
5340 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5341 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5342 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5343 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5344 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5345
5346 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5347 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5348 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5349 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5350 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5351
5352 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5353 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5354 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5355 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5356 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5357
5358 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5359 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5360 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5361 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5362 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5363
5364 /* mte inactive, big-endian */
5365 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5366 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5367 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5368 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5369 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5370
5371 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5372 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5373 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5374 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5375 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5376
5377 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5378 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5379 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5380 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5381 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5382
5383 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5384 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5385 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5386 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5387 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5388
5389 { /* mte active, little-endian */
5390 { { gen_helper_sve_ld1bb_r_mte,
5391 gen_helper_sve_ld2bb_r_mte,
5392 gen_helper_sve_ld3bb_r_mte,
5393 gen_helper_sve_ld4bb_r_mte },
5394 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5395 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5396 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5397
5398 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5399 { gen_helper_sve_ld1hh_le_r_mte,
5400 gen_helper_sve_ld2hh_le_r_mte,
5401 gen_helper_sve_ld3hh_le_r_mte,
5402 gen_helper_sve_ld4hh_le_r_mte },
5403 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5404 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5405
5406 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5407 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5408 { gen_helper_sve_ld1ss_le_r_mte,
5409 gen_helper_sve_ld2ss_le_r_mte,
5410 gen_helper_sve_ld3ss_le_r_mte,
5411 gen_helper_sve_ld4ss_le_r_mte },
5412 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5413
5414 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5415 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5416 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5417 { gen_helper_sve_ld1dd_le_r_mte,
5418 gen_helper_sve_ld2dd_le_r_mte,
5419 gen_helper_sve_ld3dd_le_r_mte,
5420 gen_helper_sve_ld4dd_le_r_mte } },
5421
5422 /* mte active, big-endian */
5423 { { gen_helper_sve_ld1bb_r_mte,
5424 gen_helper_sve_ld2bb_r_mte,
5425 gen_helper_sve_ld3bb_r_mte,
5426 gen_helper_sve_ld4bb_r_mte },
5427 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5428 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5429 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5430
5431 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5432 { gen_helper_sve_ld1hh_be_r_mte,
5433 gen_helper_sve_ld2hh_be_r_mte,
5434 gen_helper_sve_ld3hh_be_r_mte,
5435 gen_helper_sve_ld4hh_be_r_mte },
5436 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5437 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5438
5439 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5440 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5441 { gen_helper_sve_ld1ss_be_r_mte,
5442 gen_helper_sve_ld2ss_be_r_mte,
5443 gen_helper_sve_ld3ss_be_r_mte,
5444 gen_helper_sve_ld4ss_be_r_mte },
5445 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5446
5447 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5448 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5449 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5450 { gen_helper_sve_ld1dd_be_r_mte,
5451 gen_helper_sve_ld2dd_be_r_mte,
5452 gen_helper_sve_ld3dd_be_r_mte,
5453 gen_helper_sve_ld4dd_be_r_mte } } },
5454};
5455
c4e7c493
RH
5456static void do_ld_zpa(DisasContext *s, int zt, int pg,
5457 TCGv_i64 addr, int dtype, int nreg)
5458{
206adacf 5459 gen_helper_gvec_mem *fn
c182c6db 5460 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5461
206adacf
RH
5462 /*
5463 * While there are holes in the table, they are not
c4e7c493
RH
5464 * accessible via the instruction encoding.
5465 */
5466 assert(fn != NULL);
206adacf 5467 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5468}
5469
3a7be554 5470static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5471{
5472 if (a->rm == 31) {
5473 return false;
5474 }
5475 if (sve_access_check(s)) {
5476 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5477 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5478 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5479 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5480 }
5481 return true;
5482}
5483
3a7be554 5484static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5485{
5486 if (sve_access_check(s)) {
5487 int vsz = vec_full_reg_size(s);
5488 int elements = vsz >> dtype_esz[a->dtype];
5489 TCGv_i64 addr = new_tmp_a64(s);
5490
5491 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5492 (a->imm * elements * (a->nreg + 1))
5493 << dtype_msz(a->dtype));
5494 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5495 }
5496 return true;
5497}
e2654d75 5498
3a7be554 5499static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5500{
aa13f7c3
RH
5501 static gen_helper_gvec_mem * const fns[2][2][16] = {
5502 { /* mte inactive, little-endian */
5503 { gen_helper_sve_ldff1bb_r,
5504 gen_helper_sve_ldff1bhu_r,
5505 gen_helper_sve_ldff1bsu_r,
5506 gen_helper_sve_ldff1bdu_r,
5507
5508 gen_helper_sve_ldff1sds_le_r,
5509 gen_helper_sve_ldff1hh_le_r,
5510 gen_helper_sve_ldff1hsu_le_r,
5511 gen_helper_sve_ldff1hdu_le_r,
5512
5513 gen_helper_sve_ldff1hds_le_r,
5514 gen_helper_sve_ldff1hss_le_r,
5515 gen_helper_sve_ldff1ss_le_r,
5516 gen_helper_sve_ldff1sdu_le_r,
5517
5518 gen_helper_sve_ldff1bds_r,
5519 gen_helper_sve_ldff1bss_r,
5520 gen_helper_sve_ldff1bhs_r,
5521 gen_helper_sve_ldff1dd_le_r },
5522
5523 /* mte inactive, big-endian */
5524 { gen_helper_sve_ldff1bb_r,
5525 gen_helper_sve_ldff1bhu_r,
5526 gen_helper_sve_ldff1bsu_r,
5527 gen_helper_sve_ldff1bdu_r,
5528
5529 gen_helper_sve_ldff1sds_be_r,
5530 gen_helper_sve_ldff1hh_be_r,
5531 gen_helper_sve_ldff1hsu_be_r,
5532 gen_helper_sve_ldff1hdu_be_r,
5533
5534 gen_helper_sve_ldff1hds_be_r,
5535 gen_helper_sve_ldff1hss_be_r,
5536 gen_helper_sve_ldff1ss_be_r,
5537 gen_helper_sve_ldff1sdu_be_r,
5538
5539 gen_helper_sve_ldff1bds_r,
5540 gen_helper_sve_ldff1bss_r,
5541 gen_helper_sve_ldff1bhs_r,
5542 gen_helper_sve_ldff1dd_be_r } },
5543
5544 { /* mte active, little-endian */
5545 { gen_helper_sve_ldff1bb_r_mte,
5546 gen_helper_sve_ldff1bhu_r_mte,
5547 gen_helper_sve_ldff1bsu_r_mte,
5548 gen_helper_sve_ldff1bdu_r_mte,
5549
5550 gen_helper_sve_ldff1sds_le_r_mte,
5551 gen_helper_sve_ldff1hh_le_r_mte,
5552 gen_helper_sve_ldff1hsu_le_r_mte,
5553 gen_helper_sve_ldff1hdu_le_r_mte,
5554
5555 gen_helper_sve_ldff1hds_le_r_mte,
5556 gen_helper_sve_ldff1hss_le_r_mte,
5557 gen_helper_sve_ldff1ss_le_r_mte,
5558 gen_helper_sve_ldff1sdu_le_r_mte,
5559
5560 gen_helper_sve_ldff1bds_r_mte,
5561 gen_helper_sve_ldff1bss_r_mte,
5562 gen_helper_sve_ldff1bhs_r_mte,
5563 gen_helper_sve_ldff1dd_le_r_mte },
5564
5565 /* mte active, big-endian */
5566 { gen_helper_sve_ldff1bb_r_mte,
5567 gen_helper_sve_ldff1bhu_r_mte,
5568 gen_helper_sve_ldff1bsu_r_mte,
5569 gen_helper_sve_ldff1bdu_r_mte,
5570
5571 gen_helper_sve_ldff1sds_be_r_mte,
5572 gen_helper_sve_ldff1hh_be_r_mte,
5573 gen_helper_sve_ldff1hsu_be_r_mte,
5574 gen_helper_sve_ldff1hdu_be_r_mte,
5575
5576 gen_helper_sve_ldff1hds_be_r_mte,
5577 gen_helper_sve_ldff1hss_be_r_mte,
5578 gen_helper_sve_ldff1ss_be_r_mte,
5579 gen_helper_sve_ldff1sdu_be_r_mte,
5580
5581 gen_helper_sve_ldff1bds_r_mte,
5582 gen_helper_sve_ldff1bss_r_mte,
5583 gen_helper_sve_ldff1bhs_r_mte,
5584 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5585 };
5586
5587 if (sve_access_check(s)) {
5588 TCGv_i64 addr = new_tmp_a64(s);
5589 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5590 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5591 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5592 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5593 }
5594 return true;
5595}
5596
3a7be554 5597static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5598{
aa13f7c3
RH
5599 static gen_helper_gvec_mem * const fns[2][2][16] = {
5600 { /* mte inactive, little-endian */
5601 { gen_helper_sve_ldnf1bb_r,
5602 gen_helper_sve_ldnf1bhu_r,
5603 gen_helper_sve_ldnf1bsu_r,
5604 gen_helper_sve_ldnf1bdu_r,
5605
5606 gen_helper_sve_ldnf1sds_le_r,
5607 gen_helper_sve_ldnf1hh_le_r,
5608 gen_helper_sve_ldnf1hsu_le_r,
5609 gen_helper_sve_ldnf1hdu_le_r,
5610
5611 gen_helper_sve_ldnf1hds_le_r,
5612 gen_helper_sve_ldnf1hss_le_r,
5613 gen_helper_sve_ldnf1ss_le_r,
5614 gen_helper_sve_ldnf1sdu_le_r,
5615
5616 gen_helper_sve_ldnf1bds_r,
5617 gen_helper_sve_ldnf1bss_r,
5618 gen_helper_sve_ldnf1bhs_r,
5619 gen_helper_sve_ldnf1dd_le_r },
5620
5621 /* mte inactive, big-endian */
5622 { gen_helper_sve_ldnf1bb_r,
5623 gen_helper_sve_ldnf1bhu_r,
5624 gen_helper_sve_ldnf1bsu_r,
5625 gen_helper_sve_ldnf1bdu_r,
5626
5627 gen_helper_sve_ldnf1sds_be_r,
5628 gen_helper_sve_ldnf1hh_be_r,
5629 gen_helper_sve_ldnf1hsu_be_r,
5630 gen_helper_sve_ldnf1hdu_be_r,
5631
5632 gen_helper_sve_ldnf1hds_be_r,
5633 gen_helper_sve_ldnf1hss_be_r,
5634 gen_helper_sve_ldnf1ss_be_r,
5635 gen_helper_sve_ldnf1sdu_be_r,
5636
5637 gen_helper_sve_ldnf1bds_r,
5638 gen_helper_sve_ldnf1bss_r,
5639 gen_helper_sve_ldnf1bhs_r,
5640 gen_helper_sve_ldnf1dd_be_r } },
5641
5642 { /* mte inactive, little-endian */
5643 { gen_helper_sve_ldnf1bb_r_mte,
5644 gen_helper_sve_ldnf1bhu_r_mte,
5645 gen_helper_sve_ldnf1bsu_r_mte,
5646 gen_helper_sve_ldnf1bdu_r_mte,
5647
5648 gen_helper_sve_ldnf1sds_le_r_mte,
5649 gen_helper_sve_ldnf1hh_le_r_mte,
5650 gen_helper_sve_ldnf1hsu_le_r_mte,
5651 gen_helper_sve_ldnf1hdu_le_r_mte,
5652
5653 gen_helper_sve_ldnf1hds_le_r_mte,
5654 gen_helper_sve_ldnf1hss_le_r_mte,
5655 gen_helper_sve_ldnf1ss_le_r_mte,
5656 gen_helper_sve_ldnf1sdu_le_r_mte,
5657
5658 gen_helper_sve_ldnf1bds_r_mte,
5659 gen_helper_sve_ldnf1bss_r_mte,
5660 gen_helper_sve_ldnf1bhs_r_mte,
5661 gen_helper_sve_ldnf1dd_le_r_mte },
5662
5663 /* mte inactive, big-endian */
5664 { gen_helper_sve_ldnf1bb_r_mte,
5665 gen_helper_sve_ldnf1bhu_r_mte,
5666 gen_helper_sve_ldnf1bsu_r_mte,
5667 gen_helper_sve_ldnf1bdu_r_mte,
5668
5669 gen_helper_sve_ldnf1sds_be_r_mte,
5670 gen_helper_sve_ldnf1hh_be_r_mte,
5671 gen_helper_sve_ldnf1hsu_be_r_mte,
5672 gen_helper_sve_ldnf1hdu_be_r_mte,
5673
5674 gen_helper_sve_ldnf1hds_be_r_mte,
5675 gen_helper_sve_ldnf1hss_be_r_mte,
5676 gen_helper_sve_ldnf1ss_be_r_mte,
5677 gen_helper_sve_ldnf1sdu_be_r_mte,
5678
5679 gen_helper_sve_ldnf1bds_r_mte,
5680 gen_helper_sve_ldnf1bss_r_mte,
5681 gen_helper_sve_ldnf1bhs_r_mte,
5682 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5683 };
5684
5685 if (sve_access_check(s)) {
5686 int vsz = vec_full_reg_size(s);
5687 int elements = vsz >> dtype_esz[a->dtype];
5688 int off = (a->imm * elements) << dtype_msz(a->dtype);
5689 TCGv_i64 addr = new_tmp_a64(s);
5690
5691 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5692 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5693 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5694 }
5695 return true;
5696}
1a039c7e 5697
c182c6db 5698static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5699{
05abe304
RH
5700 unsigned vsz = vec_full_reg_size(s);
5701 TCGv_ptr t_pg;
7924d239 5702 int poff;
05abe304
RH
5703
5704 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5705 poff = pred_full_reg_offset(s, pg);
5706 if (vsz > 16) {
5707 /*
5708 * Zero-extend the first 16 bits of the predicate into a temporary.
5709 * This avoids triggering an assert making sure we don't have bits
5710 * set within a predicate beyond VQ, but we have lowered VQ to 1
5711 * for this load operation.
5712 */
5713 TCGv_i64 tmp = tcg_temp_new_i64();
5714#ifdef HOST_WORDS_BIGENDIAN
5715 poff += 6;
5716#endif
5717 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5718
5719 poff = offsetof(CPUARMState, vfp.preg_tmp);
5720 tcg_gen_st_i64(tmp, cpu_env, poff);
5721 tcg_temp_free_i64(tmp);
5722 }
5723
05abe304 5724 t_pg = tcg_temp_new_ptr();
2a99ab2b 5725 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5726
c182c6db
RH
5727 gen_helper_gvec_mem *fn
5728 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5729 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5730
5731 tcg_temp_free_ptr(t_pg);
05abe304
RH
5732
5733 /* Replicate that first quadword. */
5734 if (vsz > 16) {
7924d239
RH
5735 int doff = vec_full_reg_offset(s, zt);
5736 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5737 }
5738}
5739
3a7be554 5740static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5741{
5742 if (a->rm == 31) {
5743 return false;
5744 }
5745 if (sve_access_check(s)) {
5746 int msz = dtype_msz(a->dtype);
5747 TCGv_i64 addr = new_tmp_a64(s);
5748 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5749 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5750 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5751 }
5752 return true;
5753}
5754
3a7be554 5755static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5756{
5757 if (sve_access_check(s)) {
5758 TCGv_i64 addr = new_tmp_a64(s);
5759 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5760 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5761 }
5762 return true;
5763}
5764
12c563f6
RH
5765static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5766{
5767 unsigned vsz = vec_full_reg_size(s);
5768 unsigned vsz_r32;
5769 TCGv_ptr t_pg;
5770 int poff, doff;
5771
5772 if (vsz < 32) {
5773 /*
5774 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5775 * in the ARM pseudocode, which is the sve_access_check() done
5776 * in our caller. We should not now return false from the caller.
5777 */
5778 unallocated_encoding(s);
5779 return;
5780 }
5781
5782 /* Load the first octaword using the normal predicated load helpers. */
5783
5784 poff = pred_full_reg_offset(s, pg);
5785 if (vsz > 32) {
5786 /*
5787 * Zero-extend the first 32 bits of the predicate into a temporary.
5788 * This avoids triggering an assert making sure we don't have bits
5789 * set within a predicate beyond VQ, but we have lowered VQ to 2
5790 * for this load operation.
5791 */
5792 TCGv_i64 tmp = tcg_temp_new_i64();
5793#ifdef HOST_WORDS_BIGENDIAN
5794 poff += 4;
5795#endif
5796 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5797
5798 poff = offsetof(CPUARMState, vfp.preg_tmp);
5799 tcg_gen_st_i64(tmp, cpu_env, poff);
5800 tcg_temp_free_i64(tmp);
5801 }
5802
5803 t_pg = tcg_temp_new_ptr();
5804 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5805
5806 gen_helper_gvec_mem *fn
5807 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5808 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5809
5810 tcg_temp_free_ptr(t_pg);
5811
5812 /*
5813 * Replicate that first octaword.
5814 * The replication happens in units of 32; if the full vector size
5815 * is not a multiple of 32, the final bits are zeroed.
5816 */
5817 doff = vec_full_reg_offset(s, zt);
5818 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5819 if (vsz >= 64) {
5820 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5821 }
5822 vsz -= vsz_r32;
5823 if (vsz) {
5824 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5825 }
5826}
5827
5828static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5829{
5830 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5831 return false;
5832 }
5833 if (a->rm == 31) {
5834 return false;
5835 }
5836 if (sve_access_check(s)) {
5837 TCGv_i64 addr = new_tmp_a64(s);
5838 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5839 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5840 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5841 }
5842 return true;
5843}
5844
5845static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5846{
5847 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5848 return false;
5849 }
5850 if (sve_access_check(s)) {
5851 TCGv_i64 addr = new_tmp_a64(s);
5852 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5853 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5854 }
5855 return true;
5856}
5857
68459864 5858/* Load and broadcast element. */
3a7be554 5859static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5860{
68459864
RH
5861 unsigned vsz = vec_full_reg_size(s);
5862 unsigned psz = pred_full_reg_size(s);
5863 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5864 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5865 TCGLabel *over;
4ac430e1 5866 TCGv_i64 temp, clean_addr;
68459864 5867
c0ed9166
RH
5868 if (!sve_access_check(s)) {
5869 return true;
5870 }
5871
5872 over = gen_new_label();
5873
68459864
RH
5874 /* If the guarding predicate has no bits set, no load occurs. */
5875 if (psz <= 8) {
5876 /* Reduce the pred_esz_masks value simply to reduce the
5877 * size of the code generated here.
5878 */
5879 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5880 temp = tcg_temp_new_i64();
5881 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5882 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5883 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5884 tcg_temp_free_i64(temp);
5885 } else {
5886 TCGv_i32 t32 = tcg_temp_new_i32();
5887 find_last_active(s, t32, esz, a->pg);
5888 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5889 tcg_temp_free_i32(t32);
5890 }
5891
5892 /* Load the data. */
5893 temp = tcg_temp_new_i64();
d0e372b0 5894 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5895 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5896
5897 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5898 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5899
5900 /* Broadcast to *all* elements. */
5901 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5902 vsz, vsz, temp);
5903 tcg_temp_free_i64(temp);
5904
5905 /* Zero the inactive elements. */
5906 gen_set_label(over);
60245996 5907 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5908}
5909
1a039c7e
RH
5910static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5911 int msz, int esz, int nreg)
5912{
71b9f394
RH
5913 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5914 { { { gen_helper_sve_st1bb_r,
5915 gen_helper_sve_st1bh_r,
5916 gen_helper_sve_st1bs_r,
5917 gen_helper_sve_st1bd_r },
5918 { NULL,
5919 gen_helper_sve_st1hh_le_r,
5920 gen_helper_sve_st1hs_le_r,
5921 gen_helper_sve_st1hd_le_r },
5922 { NULL, NULL,
5923 gen_helper_sve_st1ss_le_r,
5924 gen_helper_sve_st1sd_le_r },
5925 { NULL, NULL, NULL,
5926 gen_helper_sve_st1dd_le_r } },
5927 { { gen_helper_sve_st1bb_r,
5928 gen_helper_sve_st1bh_r,
5929 gen_helper_sve_st1bs_r,
5930 gen_helper_sve_st1bd_r },
5931 { NULL,
5932 gen_helper_sve_st1hh_be_r,
5933 gen_helper_sve_st1hs_be_r,
5934 gen_helper_sve_st1hd_be_r },
5935 { NULL, NULL,
5936 gen_helper_sve_st1ss_be_r,
5937 gen_helper_sve_st1sd_be_r },
5938 { NULL, NULL, NULL,
5939 gen_helper_sve_st1dd_be_r } } },
5940
5941 { { { gen_helper_sve_st1bb_r_mte,
5942 gen_helper_sve_st1bh_r_mte,
5943 gen_helper_sve_st1bs_r_mte,
5944 gen_helper_sve_st1bd_r_mte },
5945 { NULL,
5946 gen_helper_sve_st1hh_le_r_mte,
5947 gen_helper_sve_st1hs_le_r_mte,
5948 gen_helper_sve_st1hd_le_r_mte },
5949 { NULL, NULL,
5950 gen_helper_sve_st1ss_le_r_mte,
5951 gen_helper_sve_st1sd_le_r_mte },
5952 { NULL, NULL, NULL,
5953 gen_helper_sve_st1dd_le_r_mte } },
5954 { { gen_helper_sve_st1bb_r_mte,
5955 gen_helper_sve_st1bh_r_mte,
5956 gen_helper_sve_st1bs_r_mte,
5957 gen_helper_sve_st1bd_r_mte },
5958 { NULL,
5959 gen_helper_sve_st1hh_be_r_mte,
5960 gen_helper_sve_st1hs_be_r_mte,
5961 gen_helper_sve_st1hd_be_r_mte },
5962 { NULL, NULL,
5963 gen_helper_sve_st1ss_be_r_mte,
5964 gen_helper_sve_st1sd_be_r_mte },
5965 { NULL, NULL, NULL,
5966 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5967 };
71b9f394
RH
5968 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5969 { { { gen_helper_sve_st2bb_r,
5970 gen_helper_sve_st2hh_le_r,
5971 gen_helper_sve_st2ss_le_r,
5972 gen_helper_sve_st2dd_le_r },
5973 { gen_helper_sve_st3bb_r,
5974 gen_helper_sve_st3hh_le_r,
5975 gen_helper_sve_st3ss_le_r,
5976 gen_helper_sve_st3dd_le_r },
5977 { gen_helper_sve_st4bb_r,
5978 gen_helper_sve_st4hh_le_r,
5979 gen_helper_sve_st4ss_le_r,
5980 gen_helper_sve_st4dd_le_r } },
5981 { { gen_helper_sve_st2bb_r,
5982 gen_helper_sve_st2hh_be_r,
5983 gen_helper_sve_st2ss_be_r,
5984 gen_helper_sve_st2dd_be_r },
5985 { gen_helper_sve_st3bb_r,
5986 gen_helper_sve_st3hh_be_r,
5987 gen_helper_sve_st3ss_be_r,
5988 gen_helper_sve_st3dd_be_r },
5989 { gen_helper_sve_st4bb_r,
5990 gen_helper_sve_st4hh_be_r,
5991 gen_helper_sve_st4ss_be_r,
5992 gen_helper_sve_st4dd_be_r } } },
5993 { { { gen_helper_sve_st2bb_r_mte,
5994 gen_helper_sve_st2hh_le_r_mte,
5995 gen_helper_sve_st2ss_le_r_mte,
5996 gen_helper_sve_st2dd_le_r_mte },
5997 { gen_helper_sve_st3bb_r_mte,
5998 gen_helper_sve_st3hh_le_r_mte,
5999 gen_helper_sve_st3ss_le_r_mte,
6000 gen_helper_sve_st3dd_le_r_mte },
6001 { gen_helper_sve_st4bb_r_mte,
6002 gen_helper_sve_st4hh_le_r_mte,
6003 gen_helper_sve_st4ss_le_r_mte,
6004 gen_helper_sve_st4dd_le_r_mte } },
6005 { { gen_helper_sve_st2bb_r_mte,
6006 gen_helper_sve_st2hh_be_r_mte,
6007 gen_helper_sve_st2ss_be_r_mte,
6008 gen_helper_sve_st2dd_be_r_mte },
6009 { gen_helper_sve_st3bb_r_mte,
6010 gen_helper_sve_st3hh_be_r_mte,
6011 gen_helper_sve_st3ss_be_r_mte,
6012 gen_helper_sve_st3dd_be_r_mte },
6013 { gen_helper_sve_st4bb_r_mte,
6014 gen_helper_sve_st4hh_be_r_mte,
6015 gen_helper_sve_st4ss_be_r_mte,
6016 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
6017 };
6018 gen_helper_gvec_mem *fn;
28d57f2d 6019 int be = s->be_data == MO_BE;
1a039c7e
RH
6020
6021 if (nreg == 0) {
6022 /* ST1 */
71b9f394
RH
6023 fn = fn_single[s->mte_active[0]][be][msz][esz];
6024 nreg = 1;
1a039c7e
RH
6025 } else {
6026 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
6027 assert(msz == esz);
71b9f394 6028 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
6029 }
6030 assert(fn != NULL);
71b9f394 6031 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
6032}
6033
3a7be554 6034static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
6035{
6036 if (a->rm == 31 || a->msz > a->esz) {
6037 return false;
6038 }
6039 if (sve_access_check(s)) {
6040 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 6041 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
6042 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
6043 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6044 }
6045 return true;
6046}
6047
3a7be554 6048static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
6049{
6050 if (a->msz > a->esz) {
6051 return false;
6052 }
6053 if (sve_access_check(s)) {
6054 int vsz = vec_full_reg_size(s);
6055 int elements = vsz >> a->esz;
6056 TCGv_i64 addr = new_tmp_a64(s);
6057
6058 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
6059 (a->imm * elements * (a->nreg + 1)) << a->msz);
6060 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6061 }
6062 return true;
6063}
f6dbf62a
RH
6064
6065/*
6066 *** SVE gather loads / scatter stores
6067 */
6068
500d0484 6069static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 6070 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 6071 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
6072{
6073 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
6074 TCGv_ptr t_zm = tcg_temp_new_ptr();
6075 TCGv_ptr t_pg = tcg_temp_new_ptr();
6076 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 6077 TCGv_i32 t_desc;
d28d12f0 6078 int desc = 0;
500d0484 6079
d28d12f0
RH
6080 if (s->mte_active[0]) {
6081 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
6082 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
6083 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
6084 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 6085 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
6086 desc <<= SVE_MTEDESC_SHIFT;
6087 }
cdecb3fc 6088 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 6089 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
6090
6091 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
6092 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
6093 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 6094 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
6095
6096 tcg_temp_free_ptr(t_zt);
6097 tcg_temp_free_ptr(t_zm);
6098 tcg_temp_free_ptr(t_pg);
500d0484 6099 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
6100}
6101
d28d12f0
RH
6102/* Indexed by [mte][be][ff][xs][u][msz]. */
6103static gen_helper_gvec_mem_scatter * const
6104gather_load_fn32[2][2][2][2][2][3] = {
6105 { /* MTE Inactive */
6106 { /* Little-endian */
6107 { { { gen_helper_sve_ldbss_zsu,
6108 gen_helper_sve_ldhss_le_zsu,
6109 NULL, },
6110 { gen_helper_sve_ldbsu_zsu,
6111 gen_helper_sve_ldhsu_le_zsu,
6112 gen_helper_sve_ldss_le_zsu, } },
6113 { { gen_helper_sve_ldbss_zss,
6114 gen_helper_sve_ldhss_le_zss,
6115 NULL, },
6116 { gen_helper_sve_ldbsu_zss,
6117 gen_helper_sve_ldhsu_le_zss,
6118 gen_helper_sve_ldss_le_zss, } } },
6119
6120 /* First-fault */
6121 { { { gen_helper_sve_ldffbss_zsu,
6122 gen_helper_sve_ldffhss_le_zsu,
6123 NULL, },
6124 { gen_helper_sve_ldffbsu_zsu,
6125 gen_helper_sve_ldffhsu_le_zsu,
6126 gen_helper_sve_ldffss_le_zsu, } },
6127 { { gen_helper_sve_ldffbss_zss,
6128 gen_helper_sve_ldffhss_le_zss,
6129 NULL, },
6130 { gen_helper_sve_ldffbsu_zss,
6131 gen_helper_sve_ldffhsu_le_zss,
6132 gen_helper_sve_ldffss_le_zss, } } } },
6133
6134 { /* Big-endian */
6135 { { { gen_helper_sve_ldbss_zsu,
6136 gen_helper_sve_ldhss_be_zsu,
6137 NULL, },
6138 { gen_helper_sve_ldbsu_zsu,
6139 gen_helper_sve_ldhsu_be_zsu,
6140 gen_helper_sve_ldss_be_zsu, } },
6141 { { gen_helper_sve_ldbss_zss,
6142 gen_helper_sve_ldhss_be_zss,
6143 NULL, },
6144 { gen_helper_sve_ldbsu_zss,
6145 gen_helper_sve_ldhsu_be_zss,
6146 gen_helper_sve_ldss_be_zss, } } },
6147
6148 /* First-fault */
6149 { { { gen_helper_sve_ldffbss_zsu,
6150 gen_helper_sve_ldffhss_be_zsu,
6151 NULL, },
6152 { gen_helper_sve_ldffbsu_zsu,
6153 gen_helper_sve_ldffhsu_be_zsu,
6154 gen_helper_sve_ldffss_be_zsu, } },
6155 { { gen_helper_sve_ldffbss_zss,
6156 gen_helper_sve_ldffhss_be_zss,
6157 NULL, },
6158 { gen_helper_sve_ldffbsu_zss,
6159 gen_helper_sve_ldffhsu_be_zss,
6160 gen_helper_sve_ldffss_be_zss, } } } } },
6161 { /* MTE Active */
6162 { /* Little-endian */
6163 { { { gen_helper_sve_ldbss_zsu_mte,
6164 gen_helper_sve_ldhss_le_zsu_mte,
6165 NULL, },
6166 { gen_helper_sve_ldbsu_zsu_mte,
6167 gen_helper_sve_ldhsu_le_zsu_mte,
6168 gen_helper_sve_ldss_le_zsu_mte, } },
6169 { { gen_helper_sve_ldbss_zss_mte,
6170 gen_helper_sve_ldhss_le_zss_mte,
6171 NULL, },
6172 { gen_helper_sve_ldbsu_zss_mte,
6173 gen_helper_sve_ldhsu_le_zss_mte,
6174 gen_helper_sve_ldss_le_zss_mte, } } },
6175
6176 /* First-fault */
6177 { { { gen_helper_sve_ldffbss_zsu_mte,
6178 gen_helper_sve_ldffhss_le_zsu_mte,
6179 NULL, },
6180 { gen_helper_sve_ldffbsu_zsu_mte,
6181 gen_helper_sve_ldffhsu_le_zsu_mte,
6182 gen_helper_sve_ldffss_le_zsu_mte, } },
6183 { { gen_helper_sve_ldffbss_zss_mte,
6184 gen_helper_sve_ldffhss_le_zss_mte,
6185 NULL, },
6186 { gen_helper_sve_ldffbsu_zss_mte,
6187 gen_helper_sve_ldffhsu_le_zss_mte,
6188 gen_helper_sve_ldffss_le_zss_mte, } } } },
6189
6190 { /* Big-endian */
6191 { { { gen_helper_sve_ldbss_zsu_mte,
6192 gen_helper_sve_ldhss_be_zsu_mte,
6193 NULL, },
6194 { gen_helper_sve_ldbsu_zsu_mte,
6195 gen_helper_sve_ldhsu_be_zsu_mte,
6196 gen_helper_sve_ldss_be_zsu_mte, } },
6197 { { gen_helper_sve_ldbss_zss_mte,
6198 gen_helper_sve_ldhss_be_zss_mte,
6199 NULL, },
6200 { gen_helper_sve_ldbsu_zss_mte,
6201 gen_helper_sve_ldhsu_be_zss_mte,
6202 gen_helper_sve_ldss_be_zss_mte, } } },
6203
6204 /* First-fault */
6205 { { { gen_helper_sve_ldffbss_zsu_mte,
6206 gen_helper_sve_ldffhss_be_zsu_mte,
6207 NULL, },
6208 { gen_helper_sve_ldffbsu_zsu_mte,
6209 gen_helper_sve_ldffhsu_be_zsu_mte,
6210 gen_helper_sve_ldffss_be_zsu_mte, } },
6211 { { gen_helper_sve_ldffbss_zss_mte,
6212 gen_helper_sve_ldffhss_be_zss_mte,
6213 NULL, },
6214 { gen_helper_sve_ldffbsu_zss_mte,
6215 gen_helper_sve_ldffhsu_be_zss_mte,
6216 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
6217};
6218
6219/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6220static gen_helper_gvec_mem_scatter * const
6221gather_load_fn64[2][2][2][3][2][4] = {
6222 { /* MTE Inactive */
6223 { /* Little-endian */
6224 { { { gen_helper_sve_ldbds_zsu,
6225 gen_helper_sve_ldhds_le_zsu,
6226 gen_helper_sve_ldsds_le_zsu,
6227 NULL, },
6228 { gen_helper_sve_ldbdu_zsu,
6229 gen_helper_sve_ldhdu_le_zsu,
6230 gen_helper_sve_ldsdu_le_zsu,
6231 gen_helper_sve_lddd_le_zsu, } },
6232 { { gen_helper_sve_ldbds_zss,
6233 gen_helper_sve_ldhds_le_zss,
6234 gen_helper_sve_ldsds_le_zss,
6235 NULL, },
6236 { gen_helper_sve_ldbdu_zss,
6237 gen_helper_sve_ldhdu_le_zss,
6238 gen_helper_sve_ldsdu_le_zss,
6239 gen_helper_sve_lddd_le_zss, } },
6240 { { gen_helper_sve_ldbds_zd,
6241 gen_helper_sve_ldhds_le_zd,
6242 gen_helper_sve_ldsds_le_zd,
6243 NULL, },
6244 { gen_helper_sve_ldbdu_zd,
6245 gen_helper_sve_ldhdu_le_zd,
6246 gen_helper_sve_ldsdu_le_zd,
6247 gen_helper_sve_lddd_le_zd, } } },
6248
6249 /* First-fault */
6250 { { { gen_helper_sve_ldffbds_zsu,
6251 gen_helper_sve_ldffhds_le_zsu,
6252 gen_helper_sve_ldffsds_le_zsu,
6253 NULL, },
6254 { gen_helper_sve_ldffbdu_zsu,
6255 gen_helper_sve_ldffhdu_le_zsu,
6256 gen_helper_sve_ldffsdu_le_zsu,
6257 gen_helper_sve_ldffdd_le_zsu, } },
6258 { { gen_helper_sve_ldffbds_zss,
6259 gen_helper_sve_ldffhds_le_zss,
6260 gen_helper_sve_ldffsds_le_zss,
6261 NULL, },
6262 { gen_helper_sve_ldffbdu_zss,
6263 gen_helper_sve_ldffhdu_le_zss,
6264 gen_helper_sve_ldffsdu_le_zss,
6265 gen_helper_sve_ldffdd_le_zss, } },
6266 { { gen_helper_sve_ldffbds_zd,
6267 gen_helper_sve_ldffhds_le_zd,
6268 gen_helper_sve_ldffsds_le_zd,
6269 NULL, },
6270 { gen_helper_sve_ldffbdu_zd,
6271 gen_helper_sve_ldffhdu_le_zd,
6272 gen_helper_sve_ldffsdu_le_zd,
6273 gen_helper_sve_ldffdd_le_zd, } } } },
6274 { /* Big-endian */
6275 { { { gen_helper_sve_ldbds_zsu,
6276 gen_helper_sve_ldhds_be_zsu,
6277 gen_helper_sve_ldsds_be_zsu,
6278 NULL, },
6279 { gen_helper_sve_ldbdu_zsu,
6280 gen_helper_sve_ldhdu_be_zsu,
6281 gen_helper_sve_ldsdu_be_zsu,
6282 gen_helper_sve_lddd_be_zsu, } },
6283 { { gen_helper_sve_ldbds_zss,
6284 gen_helper_sve_ldhds_be_zss,
6285 gen_helper_sve_ldsds_be_zss,
6286 NULL, },
6287 { gen_helper_sve_ldbdu_zss,
6288 gen_helper_sve_ldhdu_be_zss,
6289 gen_helper_sve_ldsdu_be_zss,
6290 gen_helper_sve_lddd_be_zss, } },
6291 { { gen_helper_sve_ldbds_zd,
6292 gen_helper_sve_ldhds_be_zd,
6293 gen_helper_sve_ldsds_be_zd,
6294 NULL, },
6295 { gen_helper_sve_ldbdu_zd,
6296 gen_helper_sve_ldhdu_be_zd,
6297 gen_helper_sve_ldsdu_be_zd,
6298 gen_helper_sve_lddd_be_zd, } } },
6299
6300 /* First-fault */
6301 { { { gen_helper_sve_ldffbds_zsu,
6302 gen_helper_sve_ldffhds_be_zsu,
6303 gen_helper_sve_ldffsds_be_zsu,
6304 NULL, },
6305 { gen_helper_sve_ldffbdu_zsu,
6306 gen_helper_sve_ldffhdu_be_zsu,
6307 gen_helper_sve_ldffsdu_be_zsu,
6308 gen_helper_sve_ldffdd_be_zsu, } },
6309 { { gen_helper_sve_ldffbds_zss,
6310 gen_helper_sve_ldffhds_be_zss,
6311 gen_helper_sve_ldffsds_be_zss,
6312 NULL, },
6313 { gen_helper_sve_ldffbdu_zss,
6314 gen_helper_sve_ldffhdu_be_zss,
6315 gen_helper_sve_ldffsdu_be_zss,
6316 gen_helper_sve_ldffdd_be_zss, } },
6317 { { gen_helper_sve_ldffbds_zd,
6318 gen_helper_sve_ldffhds_be_zd,
6319 gen_helper_sve_ldffsds_be_zd,
6320 NULL, },
6321 { gen_helper_sve_ldffbdu_zd,
6322 gen_helper_sve_ldffhdu_be_zd,
6323 gen_helper_sve_ldffsdu_be_zd,
6324 gen_helper_sve_ldffdd_be_zd, } } } } },
6325 { /* MTE Active */
6326 { /* Little-endian */
6327 { { { gen_helper_sve_ldbds_zsu_mte,
6328 gen_helper_sve_ldhds_le_zsu_mte,
6329 gen_helper_sve_ldsds_le_zsu_mte,
6330 NULL, },
6331 { gen_helper_sve_ldbdu_zsu_mte,
6332 gen_helper_sve_ldhdu_le_zsu_mte,
6333 gen_helper_sve_ldsdu_le_zsu_mte,
6334 gen_helper_sve_lddd_le_zsu_mte, } },
6335 { { gen_helper_sve_ldbds_zss_mte,
6336 gen_helper_sve_ldhds_le_zss_mte,
6337 gen_helper_sve_ldsds_le_zss_mte,
6338 NULL, },
6339 { gen_helper_sve_ldbdu_zss_mte,
6340 gen_helper_sve_ldhdu_le_zss_mte,
6341 gen_helper_sve_ldsdu_le_zss_mte,
6342 gen_helper_sve_lddd_le_zss_mte, } },
6343 { { gen_helper_sve_ldbds_zd_mte,
6344 gen_helper_sve_ldhds_le_zd_mte,
6345 gen_helper_sve_ldsds_le_zd_mte,
6346 NULL, },
6347 { gen_helper_sve_ldbdu_zd_mte,
6348 gen_helper_sve_ldhdu_le_zd_mte,
6349 gen_helper_sve_ldsdu_le_zd_mte,
6350 gen_helper_sve_lddd_le_zd_mte, } } },
6351
6352 /* First-fault */
6353 { { { gen_helper_sve_ldffbds_zsu_mte,
6354 gen_helper_sve_ldffhds_le_zsu_mte,
6355 gen_helper_sve_ldffsds_le_zsu_mte,
6356 NULL, },
6357 { gen_helper_sve_ldffbdu_zsu_mte,
6358 gen_helper_sve_ldffhdu_le_zsu_mte,
6359 gen_helper_sve_ldffsdu_le_zsu_mte,
6360 gen_helper_sve_ldffdd_le_zsu_mte, } },
6361 { { gen_helper_sve_ldffbds_zss_mte,
6362 gen_helper_sve_ldffhds_le_zss_mte,
6363 gen_helper_sve_ldffsds_le_zss_mte,
6364 NULL, },
6365 { gen_helper_sve_ldffbdu_zss_mte,
6366 gen_helper_sve_ldffhdu_le_zss_mte,
6367 gen_helper_sve_ldffsdu_le_zss_mte,
6368 gen_helper_sve_ldffdd_le_zss_mte, } },
6369 { { gen_helper_sve_ldffbds_zd_mte,
6370 gen_helper_sve_ldffhds_le_zd_mte,
6371 gen_helper_sve_ldffsds_le_zd_mte,
6372 NULL, },
6373 { gen_helper_sve_ldffbdu_zd_mte,
6374 gen_helper_sve_ldffhdu_le_zd_mte,
6375 gen_helper_sve_ldffsdu_le_zd_mte,
6376 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6377 { /* Big-endian */
6378 { { { gen_helper_sve_ldbds_zsu_mte,
6379 gen_helper_sve_ldhds_be_zsu_mte,
6380 gen_helper_sve_ldsds_be_zsu_mte,
6381 NULL, },
6382 { gen_helper_sve_ldbdu_zsu_mte,
6383 gen_helper_sve_ldhdu_be_zsu_mte,
6384 gen_helper_sve_ldsdu_be_zsu_mte,
6385 gen_helper_sve_lddd_be_zsu_mte, } },
6386 { { gen_helper_sve_ldbds_zss_mte,
6387 gen_helper_sve_ldhds_be_zss_mte,
6388 gen_helper_sve_ldsds_be_zss_mte,
6389 NULL, },
6390 { gen_helper_sve_ldbdu_zss_mte,
6391 gen_helper_sve_ldhdu_be_zss_mte,
6392 gen_helper_sve_ldsdu_be_zss_mte,
6393 gen_helper_sve_lddd_be_zss_mte, } },
6394 { { gen_helper_sve_ldbds_zd_mte,
6395 gen_helper_sve_ldhds_be_zd_mte,
6396 gen_helper_sve_ldsds_be_zd_mte,
6397 NULL, },
6398 { gen_helper_sve_ldbdu_zd_mte,
6399 gen_helper_sve_ldhdu_be_zd_mte,
6400 gen_helper_sve_ldsdu_be_zd_mte,
6401 gen_helper_sve_lddd_be_zd_mte, } } },
6402
6403 /* First-fault */
6404 { { { gen_helper_sve_ldffbds_zsu_mte,
6405 gen_helper_sve_ldffhds_be_zsu_mte,
6406 gen_helper_sve_ldffsds_be_zsu_mte,
6407 NULL, },
6408 { gen_helper_sve_ldffbdu_zsu_mte,
6409 gen_helper_sve_ldffhdu_be_zsu_mte,
6410 gen_helper_sve_ldffsdu_be_zsu_mte,
6411 gen_helper_sve_ldffdd_be_zsu_mte, } },
6412 { { gen_helper_sve_ldffbds_zss_mte,
6413 gen_helper_sve_ldffhds_be_zss_mte,
6414 gen_helper_sve_ldffsds_be_zss_mte,
6415 NULL, },
6416 { gen_helper_sve_ldffbdu_zss_mte,
6417 gen_helper_sve_ldffhdu_be_zss_mte,
6418 gen_helper_sve_ldffsdu_be_zss_mte,
6419 gen_helper_sve_ldffdd_be_zss_mte, } },
6420 { { gen_helper_sve_ldffbds_zd_mte,
6421 gen_helper_sve_ldffhds_be_zd_mte,
6422 gen_helper_sve_ldffsds_be_zd_mte,
6423 NULL, },
6424 { gen_helper_sve_ldffbdu_zd_mte,
6425 gen_helper_sve_ldffhdu_be_zd_mte,
6426 gen_helper_sve_ldffsdu_be_zd_mte,
6427 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6428};
6429
3a7be554 6430static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6431{
6432 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6433 bool be = s->be_data == MO_BE;
6434 bool mte = s->mte_active[0];
673e9fa6
RH
6435
6436 if (!sve_access_check(s)) {
6437 return true;
6438 }
6439
6440 switch (a->esz) {
6441 case MO_32:
d28d12f0 6442 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6443 break;
6444 case MO_64:
d28d12f0 6445 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6446 break;
6447 }
6448 assert(fn != NULL);
6449
6450 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6451 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6452 return true;
6453}
6454
3a7be554 6455static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6456{
6457 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6458 bool be = s->be_data == MO_BE;
6459 bool mte = s->mte_active[0];
673e9fa6
RH
6460 TCGv_i64 imm;
6461
6462 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6463 return false;
6464 }
6465 if (!sve_access_check(s)) {
6466 return true;
6467 }
6468
6469 switch (a->esz) {
6470 case MO_32:
d28d12f0 6471 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6472 break;
6473 case MO_64:
d28d12f0 6474 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6475 break;
6476 }
6477 assert(fn != NULL);
6478
6479 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6480 * by loading the immediate into the scalar parameter.
6481 */
6482 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 6483 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
6484 tcg_temp_free_i64(imm);
6485 return true;
6486}
6487
cf327449
SL
6488static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6489{
6490 if (!dc_isar_feature(aa64_sve2, s)) {
6491 return false;
6492 }
6493 return trans_LD1_zprz(s, a);
6494}
6495
d28d12f0
RH
6496/* Indexed by [mte][be][xs][msz]. */
6497static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6498 { /* MTE Inactive */
6499 { /* Little-endian */
6500 { gen_helper_sve_stbs_zsu,
6501 gen_helper_sve_sths_le_zsu,
6502 gen_helper_sve_stss_le_zsu, },
6503 { gen_helper_sve_stbs_zss,
6504 gen_helper_sve_sths_le_zss,
6505 gen_helper_sve_stss_le_zss, } },
6506 { /* Big-endian */
6507 { gen_helper_sve_stbs_zsu,
6508 gen_helper_sve_sths_be_zsu,
6509 gen_helper_sve_stss_be_zsu, },
6510 { gen_helper_sve_stbs_zss,
6511 gen_helper_sve_sths_be_zss,
6512 gen_helper_sve_stss_be_zss, } } },
6513 { /* MTE Active */
6514 { /* Little-endian */
6515 { gen_helper_sve_stbs_zsu_mte,
6516 gen_helper_sve_sths_le_zsu_mte,
6517 gen_helper_sve_stss_le_zsu_mte, },
6518 { gen_helper_sve_stbs_zss_mte,
6519 gen_helper_sve_sths_le_zss_mte,
6520 gen_helper_sve_stss_le_zss_mte, } },
6521 { /* Big-endian */
6522 { gen_helper_sve_stbs_zsu_mte,
6523 gen_helper_sve_sths_be_zsu_mte,
6524 gen_helper_sve_stss_be_zsu_mte, },
6525 { gen_helper_sve_stbs_zss_mte,
6526 gen_helper_sve_sths_be_zss_mte,
6527 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6528};
6529
6530/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6531static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6532 { /* MTE Inactive */
6533 { /* Little-endian */
6534 { gen_helper_sve_stbd_zsu,
6535 gen_helper_sve_sthd_le_zsu,
6536 gen_helper_sve_stsd_le_zsu,
6537 gen_helper_sve_stdd_le_zsu, },
6538 { gen_helper_sve_stbd_zss,
6539 gen_helper_sve_sthd_le_zss,
6540 gen_helper_sve_stsd_le_zss,
6541 gen_helper_sve_stdd_le_zss, },
6542 { gen_helper_sve_stbd_zd,
6543 gen_helper_sve_sthd_le_zd,
6544 gen_helper_sve_stsd_le_zd,
6545 gen_helper_sve_stdd_le_zd, } },
6546 { /* Big-endian */
6547 { gen_helper_sve_stbd_zsu,
6548 gen_helper_sve_sthd_be_zsu,
6549 gen_helper_sve_stsd_be_zsu,
6550 gen_helper_sve_stdd_be_zsu, },
6551 { gen_helper_sve_stbd_zss,
6552 gen_helper_sve_sthd_be_zss,
6553 gen_helper_sve_stsd_be_zss,
6554 gen_helper_sve_stdd_be_zss, },
6555 { gen_helper_sve_stbd_zd,
6556 gen_helper_sve_sthd_be_zd,
6557 gen_helper_sve_stsd_be_zd,
6558 gen_helper_sve_stdd_be_zd, } } },
6559 { /* MTE Inactive */
6560 { /* Little-endian */
6561 { gen_helper_sve_stbd_zsu_mte,
6562 gen_helper_sve_sthd_le_zsu_mte,
6563 gen_helper_sve_stsd_le_zsu_mte,
6564 gen_helper_sve_stdd_le_zsu_mte, },
6565 { gen_helper_sve_stbd_zss_mte,
6566 gen_helper_sve_sthd_le_zss_mte,
6567 gen_helper_sve_stsd_le_zss_mte,
6568 gen_helper_sve_stdd_le_zss_mte, },
6569 { gen_helper_sve_stbd_zd_mte,
6570 gen_helper_sve_sthd_le_zd_mte,
6571 gen_helper_sve_stsd_le_zd_mte,
6572 gen_helper_sve_stdd_le_zd_mte, } },
6573 { /* Big-endian */
6574 { gen_helper_sve_stbd_zsu_mte,
6575 gen_helper_sve_sthd_be_zsu_mte,
6576 gen_helper_sve_stsd_be_zsu_mte,
6577 gen_helper_sve_stdd_be_zsu_mte, },
6578 { gen_helper_sve_stbd_zss_mte,
6579 gen_helper_sve_sthd_be_zss_mte,
6580 gen_helper_sve_stsd_be_zss_mte,
6581 gen_helper_sve_stdd_be_zss_mte, },
6582 { gen_helper_sve_stbd_zd_mte,
6583 gen_helper_sve_sthd_be_zd_mte,
6584 gen_helper_sve_stsd_be_zd_mte,
6585 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6586};
6587
3a7be554 6588static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6589{
f6dbf62a 6590 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6591 bool be = s->be_data == MO_BE;
6592 bool mte = s->mte_active[0];
f6dbf62a
RH
6593
6594 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6595 return false;
6596 }
6597 if (!sve_access_check(s)) {
6598 return true;
6599 }
6600 switch (a->esz) {
6601 case MO_32:
d28d12f0 6602 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6603 break;
6604 case MO_64:
d28d12f0 6605 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6606 break;
6607 default:
6608 g_assert_not_reached();
6609 }
6610 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6611 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6612 return true;
6613}
dec6cf6b 6614
3a7be554 6615static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6616{
6617 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6618 bool be = s->be_data == MO_BE;
6619 bool mte = s->mte_active[0];
408ecde9
RH
6620 TCGv_i64 imm;
6621
6622 if (a->esz < a->msz) {
6623 return false;
6624 }
6625 if (!sve_access_check(s)) {
6626 return true;
6627 }
6628
6629 switch (a->esz) {
6630 case MO_32:
d28d12f0 6631 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6632 break;
6633 case MO_64:
d28d12f0 6634 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6635 break;
6636 }
6637 assert(fn != NULL);
6638
6639 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6640 * by loading the immediate into the scalar parameter.
6641 */
6642 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 6643 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
6644 tcg_temp_free_i64(imm);
6645 return true;
6646}
6647
6ebca45f
SL
6648static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6649{
6650 if (!dc_isar_feature(aa64_sve2, s)) {
6651 return false;
6652 }
6653 return trans_ST1_zprz(s, a);
6654}
6655
dec6cf6b
RH
6656/*
6657 * Prefetches
6658 */
6659
3a7be554 6660static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6661{
6662 /* Prefetch is a nop within QEMU. */
2f95a3b0 6663 (void)sve_access_check(s);
dec6cf6b
RH
6664 return true;
6665}
6666
3a7be554 6667static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6668{
6669 if (a->rm == 31) {
6670 return false;
6671 }
6672 /* Prefetch is a nop within QEMU. */
2f95a3b0 6673 (void)sve_access_check(s);
dec6cf6b
RH
6674 return true;
6675}
a2103582
RH
6676
6677/*
6678 * Move Prefix
6679 *
6680 * TODO: The implementation so far could handle predicated merging movprfx.
6681 * The helper functions as written take an extra source register to
6682 * use in the operation, but the result is only written when predication
6683 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6684 * to allow the final write back to the destination to be unconditional.
6685 * For predicated zeroing movprfx, we need to rearrange the helpers to
6686 * allow the final write back to zero inactives.
6687 *
6688 * In the meantime, just emit the moves.
6689 */
6690
3a7be554 6691static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6692{
6693 return do_mov_z(s, a->rd, a->rn);
6694}
6695
3a7be554 6696static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6697{
6698 if (sve_access_check(s)) {
6699 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6700 }
6701 return true;
6702}
6703
3a7be554 6704static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6705{
60245996 6706 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6707}
5dad1ba5
RH
6708
6709/*
6710 * SVE2 Integer Multiply - Unpredicated
6711 */
6712
6713static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6714{
6715 if (!dc_isar_feature(aa64_sve2, s)) {
6716 return false;
6717 }
6718 if (sve_access_check(s)) {
6719 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6720 }
6721 return true;
6722}
6723
6724static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6725 gen_helper_gvec_3 *fn)
6726{
6727 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6728 return false;
6729 }
6730 if (sve_access_check(s)) {
6731 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6732 }
6733 return true;
6734}
6735
6736static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6737{
6738 static gen_helper_gvec_3 * const fns[4] = {
6739 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6740 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6741 };
6742 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6743}
6744
6745static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6746{
6747 static gen_helper_gvec_3 * const fns[4] = {
6748 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6749 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6750 };
6751 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6752}
6753
6754static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6755{
6756 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6757}
d4b1e59d 6758
169d7c58
RH
6759static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6760{
6761 static gen_helper_gvec_3 * const fns[4] = {
6762 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6763 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6764 };
6765 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6766}
6767
6768static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6769{
6770 static gen_helper_gvec_3 * const fns[4] = {
6771 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6772 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6773 };
6774 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6775}
6776
d4b1e59d
RH
6777/*
6778 * SVE2 Integer - Predicated
6779 */
6780
6781static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6782 gen_helper_gvec_4 *fn)
6783{
6784 if (!dc_isar_feature(aa64_sve2, s)) {
6785 return false;
6786 }
6787 return do_zpzz_ool(s, a, fn);
6788}
6789
6790static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6791{
6792 static gen_helper_gvec_4 * const fns[3] = {
6793 gen_helper_sve2_sadalp_zpzz_h,
6794 gen_helper_sve2_sadalp_zpzz_s,
6795 gen_helper_sve2_sadalp_zpzz_d,
6796 };
6797 if (a->esz == 0) {
6798 return false;
6799 }
6800 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6801}
6802
6803static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6804{
6805 static gen_helper_gvec_4 * const fns[3] = {
6806 gen_helper_sve2_uadalp_zpzz_h,
6807 gen_helper_sve2_uadalp_zpzz_s,
6808 gen_helper_sve2_uadalp_zpzz_d,
6809 };
6810 if (a->esz == 0) {
6811 return false;
6812 }
6813 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6814}
db366da8
RH
6815
6816/*
6817 * SVE2 integer unary operations (predicated)
6818 */
6819
6820static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6821 gen_helper_gvec_3 *fn)
6822{
6823 if (!dc_isar_feature(aa64_sve2, s)) {
6824 return false;
6825 }
6826 return do_zpz_ool(s, a, fn);
6827}
6828
6829static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6830{
6831 if (a->esz != 2) {
6832 return false;
6833 }
6834 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6835}
6836
6837static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6838{
6839 if (a->esz != 2) {
6840 return false;
6841 }
6842 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6843}
6844
6845static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6846{
6847 static gen_helper_gvec_3 * const fns[4] = {
6848 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6849 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6850 };
6851 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6852}
6853
6854static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6855{
6856 static gen_helper_gvec_3 * const fns[4] = {
6857 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6858 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6859 };
6860 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6861}
45d9503d
RH
6862
6863#define DO_SVE2_ZPZZ(NAME, name) \
6864static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6865{ \
6866 static gen_helper_gvec_4 * const fns[4] = { \
6867 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6868 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6869 }; \
6870 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6871}
6872
6873DO_SVE2_ZPZZ(SQSHL, sqshl)
6874DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6875DO_SVE2_ZPZZ(SRSHL, srshl)
6876
6877DO_SVE2_ZPZZ(UQSHL, uqshl)
6878DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6879DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6880
6881DO_SVE2_ZPZZ(SHADD, shadd)
6882DO_SVE2_ZPZZ(SRHADD, srhadd)
6883DO_SVE2_ZPZZ(SHSUB, shsub)
6884
6885DO_SVE2_ZPZZ(UHADD, uhadd)
6886DO_SVE2_ZPZZ(URHADD, urhadd)
6887DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6888
6889DO_SVE2_ZPZZ(ADDP, addp)
6890DO_SVE2_ZPZZ(SMAXP, smaxp)
6891DO_SVE2_ZPZZ(UMAXP, umaxp)
6892DO_SVE2_ZPZZ(SMINP, sminp)
6893DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6894
6895DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6896DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6897DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6898DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6899DO_SVE2_ZPZZ(SUQADD, suqadd)
6900DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6901
6902/*
6903 * SVE2 Widening Integer Arithmetic
6904 */
6905
6906static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6907 gen_helper_gvec_3 *fn, int data)
6908{
6909 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6910 return false;
6911 }
6912 if (sve_access_check(s)) {
6913 unsigned vsz = vec_full_reg_size(s);
6914 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6915 vec_full_reg_offset(s, a->rn),
6916 vec_full_reg_offset(s, a->rm),
6917 vsz, vsz, data, fn);
6918 }
6919 return true;
6920}
6921
6922#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6923static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6924{ \
6925 static gen_helper_gvec_3 * const fns[4] = { \
6926 NULL, gen_helper_sve2_##name##_h, \
6927 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6928 }; \
6929 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6930}
6931
6932DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6933DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6934DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6935
6936DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6937DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6938DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6939
6940DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6941DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6942DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6943
6944DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6945DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6946DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
daec426b
RH
6947
6948DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6949DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6950DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
81fccf09 6951
69ccc099
RH
6952DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6953DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6954
6955DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6956DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6957
6958DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6959DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6960
2df3ca55
RH
6961static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6962{
6963 static gen_helper_gvec_3 * const fns[4] = {
6964 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6965 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6966 };
6967 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6968}
6969
6970static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6971{
6972 return do_eor_tb(s, a, false);
6973}
6974
6975static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6976{
6977 return do_eor_tb(s, a, true);
6978}
6979
e3a56131
RH
6980static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6981{
6982 static gen_helper_gvec_3 * const fns[4] = {
6983 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6984 NULL, gen_helper_sve2_pmull_d,
6985 };
6986 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6987 return false;
6988 }
6989 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6990}
6991
6992static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6993{
6994 return do_trans_pmull(s, a, false);
6995}
6996
6997static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
6998{
6999 return do_trans_pmull(s, a, true);
7000}
7001
81fccf09
RH
7002#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
7003static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
7004{ \
7005 static gen_helper_gvec_3 * const fns[4] = { \
7006 NULL, gen_helper_sve2_##name##_h, \
7007 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7008 }; \
7009 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
7010}
7011
7012DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
7013DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
7014DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
7015DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
7016
7017DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
7018DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
7019DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
7020DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
4269fef1
RH
7021
7022static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7023{
7024 int top = imm & 1;
7025 int shl = imm >> 1;
7026 int halfbits = 4 << vece;
7027
7028 if (top) {
7029 if (shl == halfbits) {
7030 TCGv_vec t = tcg_temp_new_vec_matching(d);
7031 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7032 tcg_gen_and_vec(vece, d, n, t);
7033 tcg_temp_free_vec(t);
7034 } else {
7035 tcg_gen_sari_vec(vece, d, n, halfbits);
7036 tcg_gen_shli_vec(vece, d, d, shl);
7037 }
7038 } else {
7039 tcg_gen_shli_vec(vece, d, n, halfbits);
7040 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
7041 }
7042}
7043
7044static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
7045{
7046 int halfbits = 4 << vece;
7047 int top = imm & 1;
7048 int shl = (imm >> 1);
7049 int shift;
7050 uint64_t mask;
7051
7052 mask = MAKE_64BIT_MASK(0, halfbits);
7053 mask <<= shl;
7054 mask = dup_const(vece, mask);
7055
7056 shift = shl - top * halfbits;
7057 if (shift < 0) {
7058 tcg_gen_shri_i64(d, n, -shift);
7059 } else {
7060 tcg_gen_shli_i64(d, n, shift);
7061 }
7062 tcg_gen_andi_i64(d, d, mask);
7063}
7064
7065static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7066{
7067 gen_ushll_i64(MO_16, d, n, imm);
7068}
7069
7070static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7071{
7072 gen_ushll_i64(MO_32, d, n, imm);
7073}
7074
7075static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7076{
7077 gen_ushll_i64(MO_64, d, n, imm);
7078}
7079
7080static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7081{
7082 int halfbits = 4 << vece;
7083 int top = imm & 1;
7084 int shl = imm >> 1;
7085
7086 if (top) {
7087 if (shl == halfbits) {
7088 TCGv_vec t = tcg_temp_new_vec_matching(d);
7089 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7090 tcg_gen_and_vec(vece, d, n, t);
7091 tcg_temp_free_vec(t);
7092 } else {
7093 tcg_gen_shri_vec(vece, d, n, halfbits);
7094 tcg_gen_shli_vec(vece, d, d, shl);
7095 }
7096 } else {
7097 if (shl == 0) {
7098 TCGv_vec t = tcg_temp_new_vec_matching(d);
7099 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7100 tcg_gen_and_vec(vece, d, n, t);
7101 tcg_temp_free_vec(t);
7102 } else {
7103 tcg_gen_shli_vec(vece, d, n, halfbits);
7104 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7105 }
7106 }
7107}
7108
7109static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7110 bool sel, bool uns)
7111{
7112 static const TCGOpcode sshll_list[] = {
7113 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7114 };
7115 static const TCGOpcode ushll_list[] = {
7116 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7117 };
7118 static const GVecGen2i ops[2][3] = {
7119 { { .fniv = gen_sshll_vec,
7120 .opt_opc = sshll_list,
7121 .fno = gen_helper_sve2_sshll_h,
7122 .vece = MO_16 },
7123 { .fniv = gen_sshll_vec,
7124 .opt_opc = sshll_list,
7125 .fno = gen_helper_sve2_sshll_s,
7126 .vece = MO_32 },
7127 { .fniv = gen_sshll_vec,
7128 .opt_opc = sshll_list,
7129 .fno = gen_helper_sve2_sshll_d,
7130 .vece = MO_64 } },
7131 { { .fni8 = gen_ushll16_i64,
7132 .fniv = gen_ushll_vec,
7133 .opt_opc = ushll_list,
7134 .fno = gen_helper_sve2_ushll_h,
7135 .vece = MO_16 },
7136 { .fni8 = gen_ushll32_i64,
7137 .fniv = gen_ushll_vec,
7138 .opt_opc = ushll_list,
7139 .fno = gen_helper_sve2_ushll_s,
7140 .vece = MO_32 },
7141 { .fni8 = gen_ushll64_i64,
7142 .fniv = gen_ushll_vec,
7143 .opt_opc = ushll_list,
7144 .fno = gen_helper_sve2_ushll_d,
7145 .vece = MO_64 } },
7146 };
7147
7148 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7149 return false;
7150 }
7151 if (sve_access_check(s)) {
7152 unsigned vsz = vec_full_reg_size(s);
7153 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7154 vec_full_reg_offset(s, a->rn),
7155 vsz, vsz, (a->imm << 1) | sel,
7156 &ops[uns][a->esz]);
7157 }
7158 return true;
7159}
7160
7161static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7162{
7163 return do_sve2_shll_tb(s, a, false, false);
7164}
7165
7166static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7167{
7168 return do_sve2_shll_tb(s, a, true, false);
7169}
7170
7171static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7172{
7173 return do_sve2_shll_tb(s, a, false, true);
7174}
7175
7176static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7177{
7178 return do_sve2_shll_tb(s, a, true, true);
7179}
cb9c33b8
RH
7180
7181static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7182{
7183 static gen_helper_gvec_3 * const fns[4] = {
7184 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7185 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7186 };
7187 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7188 return false;
7189 }
7190 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7191}
7192
7193static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7194{
7195 static gen_helper_gvec_3 * const fns[4] = {
7196 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7197 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7198 };
7199 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7200 return false;
7201 }
7202 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7203}
7204
7205static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7206{
7207 static gen_helper_gvec_3 * const fns[4] = {
7208 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7209 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7210 };
7211 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7212 return false;
7213 }
7214 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7215}
ed4a6387
RH
7216
7217static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7218{
7219 static gen_helper_gvec_3 * const fns[2][4] = {
7220 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7221 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7222 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7223 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7224 };
7225 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7226}
7227
7228static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7229{
7230 return do_cadd(s, a, false, false);
7231}
7232
7233static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7234{
7235 return do_cadd(s, a, false, true);
7236}
7237
7238static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7239{
7240 return do_cadd(s, a, true, false);
7241}
7242
7243static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7244{
7245 return do_cadd(s, a, true, true);
7246}
38650638
RH
7247
7248static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7249 gen_helper_gvec_4 *fn, int data)
7250{
7251 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7252 return false;
7253 }
7254 if (sve_access_check(s)) {
7255 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7256 }
7257 return true;
7258}
7259
7260static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7261{
7262 static gen_helper_gvec_4 * const fns[2][4] = {
7263 { NULL, gen_helper_sve2_sabal_h,
7264 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7265 { NULL, gen_helper_sve2_uabal_h,
7266 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7267 };
7268 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7269}
7270
7271static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7272{
7273 return do_abal(s, a, false, false);
7274}
7275
7276static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7277{
7278 return do_abal(s, a, false, true);
7279}
7280
7281static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7282{
7283 return do_abal(s, a, true, false);
7284}
7285
7286static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7287{
7288 return do_abal(s, a, true, true);
7289}
b8295dfb
RH
7290
7291static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7292{
7293 static gen_helper_gvec_4 * const fns[2] = {
7294 gen_helper_sve2_adcl_s,
7295 gen_helper_sve2_adcl_d,
7296 };
7297 /*
7298 * Note that in this case the ESZ field encodes both size and sign.
7299 * Split out 'subtract' into bit 1 of the data field for the helper.
7300 */
7301 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7302}
7303
7304static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7305{
7306 return do_adcl(s, a, false);
7307}
7308
7309static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7310{
7311 return do_adcl(s, a, true);
7312}
a7e3a90e
RH
7313
7314static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7315{
7316 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7317 return false;
7318 }
7319 if (sve_access_check(s)) {
7320 unsigned vsz = vec_full_reg_size(s);
7321 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7322 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7323 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7324 }
7325 return true;
7326}
7327
7328static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7329{
7330 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7331}
7332
7333static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7334{
7335 return do_sve2_fn2i(s, a, gen_gvec_usra);
7336}
7337
7338static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7339{
7340 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7341}
7342
7343static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7344{
7345 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7346}
fc12b46a
RH
7347
7348static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7349{
7350 return do_sve2_fn2i(s, a, gen_gvec_sri);
7351}
7352
7353static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7354{
7355 return do_sve2_fn2i(s, a, gen_gvec_sli);
7356}
289a1797
RH
7357
7358static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7359{
7360 if (!dc_isar_feature(aa64_sve2, s)) {
7361 return false;
7362 }
7363 if (sve_access_check(s)) {
7364 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7365 }
7366 return true;
7367}
7368
7369static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7370{
7371 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7372}
7373
7374static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7375{
7376 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7377}
5ff2838d
RH
7378
7379static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7380 const GVecGen2 ops[3])
7381{
7382 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7383 !dc_isar_feature(aa64_sve2, s)) {
7384 return false;
7385 }
7386 if (sve_access_check(s)) {
7387 unsigned vsz = vec_full_reg_size(s);
7388 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7389 vec_full_reg_offset(s, a->rn),
7390 vsz, vsz, &ops[a->esz]);
7391 }
7392 return true;
7393}
7394
7395static const TCGOpcode sqxtn_list[] = {
7396 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7397};
7398
7399static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7400{
7401 TCGv_vec t = tcg_temp_new_vec_matching(d);
7402 int halfbits = 4 << vece;
7403 int64_t mask = (1ull << halfbits) - 1;
7404 int64_t min = -1ull << (halfbits - 1);
7405 int64_t max = -min - 1;
7406
7407 tcg_gen_dupi_vec(vece, t, min);
7408 tcg_gen_smax_vec(vece, d, n, t);
7409 tcg_gen_dupi_vec(vece, t, max);
7410 tcg_gen_smin_vec(vece, d, d, t);
7411 tcg_gen_dupi_vec(vece, t, mask);
7412 tcg_gen_and_vec(vece, d, d, t);
7413 tcg_temp_free_vec(t);
7414}
7415
7416static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7417{
7418 static const GVecGen2 ops[3] = {
7419 { .fniv = gen_sqxtnb_vec,
7420 .opt_opc = sqxtn_list,
7421 .fno = gen_helper_sve2_sqxtnb_h,
7422 .vece = MO_16 },
7423 { .fniv = gen_sqxtnb_vec,
7424 .opt_opc = sqxtn_list,
7425 .fno = gen_helper_sve2_sqxtnb_s,
7426 .vece = MO_32 },
7427 { .fniv = gen_sqxtnb_vec,
7428 .opt_opc = sqxtn_list,
7429 .fno = gen_helper_sve2_sqxtnb_d,
7430 .vece = MO_64 },
7431 };
7432 return do_sve2_narrow_extract(s, a, ops);
7433}
7434
7435static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7436{
7437 TCGv_vec t = tcg_temp_new_vec_matching(d);
7438 int halfbits = 4 << vece;
7439 int64_t mask = (1ull << halfbits) - 1;
7440 int64_t min = -1ull << (halfbits - 1);
7441 int64_t max = -min - 1;
7442
7443 tcg_gen_dupi_vec(vece, t, min);
7444 tcg_gen_smax_vec(vece, n, n, t);
7445 tcg_gen_dupi_vec(vece, t, max);
7446 tcg_gen_smin_vec(vece, n, n, t);
7447 tcg_gen_shli_vec(vece, n, n, halfbits);
7448 tcg_gen_dupi_vec(vece, t, mask);
7449 tcg_gen_bitsel_vec(vece, d, t, d, n);
7450 tcg_temp_free_vec(t);
7451}
7452
7453static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7454{
7455 static const GVecGen2 ops[3] = {
7456 { .fniv = gen_sqxtnt_vec,
7457 .opt_opc = sqxtn_list,
7458 .load_dest = true,
7459 .fno = gen_helper_sve2_sqxtnt_h,
7460 .vece = MO_16 },
7461 { .fniv = gen_sqxtnt_vec,
7462 .opt_opc = sqxtn_list,
7463 .load_dest = true,
7464 .fno = gen_helper_sve2_sqxtnt_s,
7465 .vece = MO_32 },
7466 { .fniv = gen_sqxtnt_vec,
7467 .opt_opc = sqxtn_list,
7468 .load_dest = true,
7469 .fno = gen_helper_sve2_sqxtnt_d,
7470 .vece = MO_64 },
7471 };
7472 return do_sve2_narrow_extract(s, a, ops);
7473}
7474
7475static const TCGOpcode uqxtn_list[] = {
7476 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7477};
7478
7479static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7480{
7481 TCGv_vec t = tcg_temp_new_vec_matching(d);
7482 int halfbits = 4 << vece;
7483 int64_t max = (1ull << halfbits) - 1;
7484
7485 tcg_gen_dupi_vec(vece, t, max);
7486 tcg_gen_umin_vec(vece, d, n, t);
7487 tcg_temp_free_vec(t);
7488}
7489
7490static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7491{
7492 static const GVecGen2 ops[3] = {
7493 { .fniv = gen_uqxtnb_vec,
7494 .opt_opc = uqxtn_list,
7495 .fno = gen_helper_sve2_uqxtnb_h,
7496 .vece = MO_16 },
7497 { .fniv = gen_uqxtnb_vec,
7498 .opt_opc = uqxtn_list,
7499 .fno = gen_helper_sve2_uqxtnb_s,
7500 .vece = MO_32 },
7501 { .fniv = gen_uqxtnb_vec,
7502 .opt_opc = uqxtn_list,
7503 .fno = gen_helper_sve2_uqxtnb_d,
7504 .vece = MO_64 },
7505 };
7506 return do_sve2_narrow_extract(s, a, ops);
7507}
7508
7509static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7510{
7511 TCGv_vec t = tcg_temp_new_vec_matching(d);
7512 int halfbits = 4 << vece;
7513 int64_t max = (1ull << halfbits) - 1;
7514
7515 tcg_gen_dupi_vec(vece, t, max);
7516 tcg_gen_umin_vec(vece, n, n, t);
7517 tcg_gen_shli_vec(vece, n, n, halfbits);
7518 tcg_gen_bitsel_vec(vece, d, t, d, n);
7519 tcg_temp_free_vec(t);
7520}
7521
7522static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7523{
7524 static const GVecGen2 ops[3] = {
7525 { .fniv = gen_uqxtnt_vec,
7526 .opt_opc = uqxtn_list,
7527 .load_dest = true,
7528 .fno = gen_helper_sve2_uqxtnt_h,
7529 .vece = MO_16 },
7530 { .fniv = gen_uqxtnt_vec,
7531 .opt_opc = uqxtn_list,
7532 .load_dest = true,
7533 .fno = gen_helper_sve2_uqxtnt_s,
7534 .vece = MO_32 },
7535 { .fniv = gen_uqxtnt_vec,
7536 .opt_opc = uqxtn_list,
7537 .load_dest = true,
7538 .fno = gen_helper_sve2_uqxtnt_d,
7539 .vece = MO_64 },
7540 };
7541 return do_sve2_narrow_extract(s, a, ops);
7542}
7543
7544static const TCGOpcode sqxtun_list[] = {
7545 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7546};
7547
7548static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7549{
7550 TCGv_vec t = tcg_temp_new_vec_matching(d);
7551 int halfbits = 4 << vece;
7552 int64_t max = (1ull << halfbits) - 1;
7553
7554 tcg_gen_dupi_vec(vece, t, 0);
7555 tcg_gen_smax_vec(vece, d, n, t);
7556 tcg_gen_dupi_vec(vece, t, max);
7557 tcg_gen_umin_vec(vece, d, d, t);
7558 tcg_temp_free_vec(t);
7559}
7560
7561static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7562{
7563 static const GVecGen2 ops[3] = {
7564 { .fniv = gen_sqxtunb_vec,
7565 .opt_opc = sqxtun_list,
7566 .fno = gen_helper_sve2_sqxtunb_h,
7567 .vece = MO_16 },
7568 { .fniv = gen_sqxtunb_vec,
7569 .opt_opc = sqxtun_list,
7570 .fno = gen_helper_sve2_sqxtunb_s,
7571 .vece = MO_32 },
7572 { .fniv = gen_sqxtunb_vec,
7573 .opt_opc = sqxtun_list,
7574 .fno = gen_helper_sve2_sqxtunb_d,
7575 .vece = MO_64 },
7576 };
7577 return do_sve2_narrow_extract(s, a, ops);
7578}
7579
7580static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7581{
7582 TCGv_vec t = tcg_temp_new_vec_matching(d);
7583 int halfbits = 4 << vece;
7584 int64_t max = (1ull << halfbits) - 1;
7585
7586 tcg_gen_dupi_vec(vece, t, 0);
7587 tcg_gen_smax_vec(vece, n, n, t);
7588 tcg_gen_dupi_vec(vece, t, max);
7589 tcg_gen_umin_vec(vece, n, n, t);
7590 tcg_gen_shli_vec(vece, n, n, halfbits);
7591 tcg_gen_bitsel_vec(vece, d, t, d, n);
7592 tcg_temp_free_vec(t);
7593}
7594
7595static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7596{
7597 static const GVecGen2 ops[3] = {
7598 { .fniv = gen_sqxtunt_vec,
7599 .opt_opc = sqxtun_list,
7600 .load_dest = true,
7601 .fno = gen_helper_sve2_sqxtunt_h,
7602 .vece = MO_16 },
7603 { .fniv = gen_sqxtunt_vec,
7604 .opt_opc = sqxtun_list,
7605 .load_dest = true,
7606 .fno = gen_helper_sve2_sqxtunt_s,
7607 .vece = MO_32 },
7608 { .fniv = gen_sqxtunt_vec,
7609 .opt_opc = sqxtun_list,
7610 .load_dest = true,
7611 .fno = gen_helper_sve2_sqxtunt_d,
7612 .vece = MO_64 },
7613 };
7614 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7615}
7616
7617static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7618 const GVecGen2i ops[3])
7619{
7620 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7621 return false;
7622 }
7623 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7624 if (sve_access_check(s)) {
7625 unsigned vsz = vec_full_reg_size(s);
7626 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7627 vec_full_reg_offset(s, a->rn),
7628 vsz, vsz, a->imm, &ops[a->esz]);
7629 }
7630 return true;
7631}
7632
7633static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7634{
7635 int halfbits = 4 << vece;
7636 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7637
7638 tcg_gen_shri_i64(d, n, shr);
7639 tcg_gen_andi_i64(d, d, mask);
7640}
7641
7642static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7643{
7644 gen_shrnb_i64(MO_16, d, n, shr);
7645}
7646
7647static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7648{
7649 gen_shrnb_i64(MO_32, d, n, shr);
7650}
7651
7652static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7653{
7654 gen_shrnb_i64(MO_64, d, n, shr);
7655}
7656
7657static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7658{
7659 TCGv_vec t = tcg_temp_new_vec_matching(d);
7660 int halfbits = 4 << vece;
7661 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7662
7663 tcg_gen_shri_vec(vece, n, n, shr);
7664 tcg_gen_dupi_vec(vece, t, mask);
7665 tcg_gen_and_vec(vece, d, n, t);
7666 tcg_temp_free_vec(t);
7667}
7668
7669static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7670{
7671 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7672 static const GVecGen2i ops[3] = {
7673 { .fni8 = gen_shrnb16_i64,
7674 .fniv = gen_shrnb_vec,
7675 .opt_opc = vec_list,
7676 .fno = gen_helper_sve2_shrnb_h,
7677 .vece = MO_16 },
7678 { .fni8 = gen_shrnb32_i64,
7679 .fniv = gen_shrnb_vec,
7680 .opt_opc = vec_list,
7681 .fno = gen_helper_sve2_shrnb_s,
7682 .vece = MO_32 },
7683 { .fni8 = gen_shrnb64_i64,
7684 .fniv = gen_shrnb_vec,
7685 .opt_opc = vec_list,
7686 .fno = gen_helper_sve2_shrnb_d,
7687 .vece = MO_64 },
7688 };
7689 return do_sve2_shr_narrow(s, a, ops);
7690}
7691
7692static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7693{
7694 int halfbits = 4 << vece;
7695 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7696
7697 tcg_gen_shli_i64(n, n, halfbits - shr);
7698 tcg_gen_andi_i64(n, n, ~mask);
7699 tcg_gen_andi_i64(d, d, mask);
7700 tcg_gen_or_i64(d, d, n);
7701}
7702
7703static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7704{
7705 gen_shrnt_i64(MO_16, d, n, shr);
7706}
7707
7708static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7709{
7710 gen_shrnt_i64(MO_32, d, n, shr);
7711}
7712
7713static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7714{
7715 tcg_gen_shri_i64(n, n, shr);
7716 tcg_gen_deposit_i64(d, d, n, 32, 32);
7717}
7718
7719static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7720{
7721 TCGv_vec t = tcg_temp_new_vec_matching(d);
7722 int halfbits = 4 << vece;
7723 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7724
7725 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7726 tcg_gen_dupi_vec(vece, t, mask);
7727 tcg_gen_bitsel_vec(vece, d, t, d, n);
7728 tcg_temp_free_vec(t);
7729}
7730
7731static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7732{
7733 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7734 static const GVecGen2i ops[3] = {
7735 { .fni8 = gen_shrnt16_i64,
7736 .fniv = gen_shrnt_vec,
7737 .opt_opc = vec_list,
7738 .load_dest = true,
7739 .fno = gen_helper_sve2_shrnt_h,
7740 .vece = MO_16 },
7741 { .fni8 = gen_shrnt32_i64,
7742 .fniv = gen_shrnt_vec,
7743 .opt_opc = vec_list,
7744 .load_dest = true,
7745 .fno = gen_helper_sve2_shrnt_s,
7746 .vece = MO_32 },
7747 { .fni8 = gen_shrnt64_i64,
7748 .fniv = gen_shrnt_vec,
7749 .opt_opc = vec_list,
7750 .load_dest = true,
7751 .fno = gen_helper_sve2_shrnt_d,
7752 .vece = MO_64 },
7753 };
7754 return do_sve2_shr_narrow(s, a, ops);
7755}
7756
7757static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7758{
7759 static const GVecGen2i ops[3] = {
7760 { .fno = gen_helper_sve2_rshrnb_h },
7761 { .fno = gen_helper_sve2_rshrnb_s },
7762 { .fno = gen_helper_sve2_rshrnb_d },
7763 };
7764 return do_sve2_shr_narrow(s, a, ops);
7765}
7766
7767static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7768{
7769 static const GVecGen2i ops[3] = {
7770 { .fno = gen_helper_sve2_rshrnt_h },
7771 { .fno = gen_helper_sve2_rshrnt_s },
7772 { .fno = gen_helper_sve2_rshrnt_d },
7773 };
7774 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7775}
7776
7777static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7778 TCGv_vec n, int64_t shr)
7779{
7780 TCGv_vec t = tcg_temp_new_vec_matching(d);
7781 int halfbits = 4 << vece;
7782
7783 tcg_gen_sari_vec(vece, n, n, shr);
7784 tcg_gen_dupi_vec(vece, t, 0);
7785 tcg_gen_smax_vec(vece, n, n, t);
7786 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7787 tcg_gen_umin_vec(vece, d, n, t);
7788 tcg_temp_free_vec(t);
7789}
7790
7791static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7792{
7793 static const TCGOpcode vec_list[] = {
7794 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7795 };
7796 static const GVecGen2i ops[3] = {
7797 { .fniv = gen_sqshrunb_vec,
7798 .opt_opc = vec_list,
7799 .fno = gen_helper_sve2_sqshrunb_h,
7800 .vece = MO_16 },
7801 { .fniv = gen_sqshrunb_vec,
7802 .opt_opc = vec_list,
7803 .fno = gen_helper_sve2_sqshrunb_s,
7804 .vece = MO_32 },
7805 { .fniv = gen_sqshrunb_vec,
7806 .opt_opc = vec_list,
7807 .fno = gen_helper_sve2_sqshrunb_d,
7808 .vece = MO_64 },
7809 };
7810 return do_sve2_shr_narrow(s, a, ops);
7811}
7812
7813static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7814 TCGv_vec n, int64_t shr)
7815{
7816 TCGv_vec t = tcg_temp_new_vec_matching(d);
7817 int halfbits = 4 << vece;
7818
7819 tcg_gen_sari_vec(vece, n, n, shr);
7820 tcg_gen_dupi_vec(vece, t, 0);
7821 tcg_gen_smax_vec(vece, n, n, t);
7822 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7823 tcg_gen_umin_vec(vece, n, n, t);
7824 tcg_gen_shli_vec(vece, n, n, halfbits);
7825 tcg_gen_bitsel_vec(vece, d, t, d, n);
7826 tcg_temp_free_vec(t);
7827}
7828
7829static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7830{
7831 static const TCGOpcode vec_list[] = {
7832 INDEX_op_shli_vec, INDEX_op_sari_vec,
7833 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7834 };
7835 static const GVecGen2i ops[3] = {
7836 { .fniv = gen_sqshrunt_vec,
7837 .opt_opc = vec_list,
7838 .load_dest = true,
7839 .fno = gen_helper_sve2_sqshrunt_h,
7840 .vece = MO_16 },
7841 { .fniv = gen_sqshrunt_vec,
7842 .opt_opc = vec_list,
7843 .load_dest = true,
7844 .fno = gen_helper_sve2_sqshrunt_s,
7845 .vece = MO_32 },
7846 { .fniv = gen_sqshrunt_vec,
7847 .opt_opc = vec_list,
7848 .load_dest = true,
7849 .fno = gen_helper_sve2_sqshrunt_d,
7850 .vece = MO_64 },
7851 };
7852 return do_sve2_shr_narrow(s, a, ops);
7853}
7854
7855static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7856{
7857 static const GVecGen2i ops[3] = {
7858 { .fno = gen_helper_sve2_sqrshrunb_h },
7859 { .fno = gen_helper_sve2_sqrshrunb_s },
7860 { .fno = gen_helper_sve2_sqrshrunb_d },
7861 };
7862 return do_sve2_shr_narrow(s, a, ops);
7863}
7864
7865static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7866{
7867 static const GVecGen2i ops[3] = {
7868 { .fno = gen_helper_sve2_sqrshrunt_h },
7869 { .fno = gen_helper_sve2_sqrshrunt_s },
7870 { .fno = gen_helper_sve2_sqrshrunt_d },
7871 };
7872 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7873}
7874
743bb147
RH
7875static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7876 TCGv_vec n, int64_t shr)
7877{
7878 TCGv_vec t = tcg_temp_new_vec_matching(d);
7879 int halfbits = 4 << vece;
7880 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7881 int64_t min = -max - 1;
7882
7883 tcg_gen_sari_vec(vece, n, n, shr);
7884 tcg_gen_dupi_vec(vece, t, min);
7885 tcg_gen_smax_vec(vece, n, n, t);
7886 tcg_gen_dupi_vec(vece, t, max);
7887 tcg_gen_smin_vec(vece, n, n, t);
7888 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7889 tcg_gen_and_vec(vece, d, n, t);
7890 tcg_temp_free_vec(t);
7891}
7892
7893static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7894{
7895 static const TCGOpcode vec_list[] = {
7896 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7897 };
7898 static const GVecGen2i ops[3] = {
7899 { .fniv = gen_sqshrnb_vec,
7900 .opt_opc = vec_list,
7901 .fno = gen_helper_sve2_sqshrnb_h,
7902 .vece = MO_16 },
7903 { .fniv = gen_sqshrnb_vec,
7904 .opt_opc = vec_list,
7905 .fno = gen_helper_sve2_sqshrnb_s,
7906 .vece = MO_32 },
7907 { .fniv = gen_sqshrnb_vec,
7908 .opt_opc = vec_list,
7909 .fno = gen_helper_sve2_sqshrnb_d,
7910 .vece = MO_64 },
7911 };
7912 return do_sve2_shr_narrow(s, a, ops);
7913}
7914
7915static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7916 TCGv_vec n, int64_t shr)
7917{
7918 TCGv_vec t = tcg_temp_new_vec_matching(d);
7919 int halfbits = 4 << vece;
7920 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7921 int64_t min = -max - 1;
7922
7923 tcg_gen_sari_vec(vece, n, n, shr);
7924 tcg_gen_dupi_vec(vece, t, min);
7925 tcg_gen_smax_vec(vece, n, n, t);
7926 tcg_gen_dupi_vec(vece, t, max);
7927 tcg_gen_smin_vec(vece, n, n, t);
7928 tcg_gen_shli_vec(vece, n, n, halfbits);
7929 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7930 tcg_gen_bitsel_vec(vece, d, t, d, n);
7931 tcg_temp_free_vec(t);
7932}
7933
7934static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7935{
7936 static const TCGOpcode vec_list[] = {
7937 INDEX_op_shli_vec, INDEX_op_sari_vec,
7938 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7939 };
7940 static const GVecGen2i ops[3] = {
7941 { .fniv = gen_sqshrnt_vec,
7942 .opt_opc = vec_list,
7943 .load_dest = true,
7944 .fno = gen_helper_sve2_sqshrnt_h,
7945 .vece = MO_16 },
7946 { .fniv = gen_sqshrnt_vec,
7947 .opt_opc = vec_list,
7948 .load_dest = true,
7949 .fno = gen_helper_sve2_sqshrnt_s,
7950 .vece = MO_32 },
7951 { .fniv = gen_sqshrnt_vec,
7952 .opt_opc = vec_list,
7953 .load_dest = true,
7954 .fno = gen_helper_sve2_sqshrnt_d,
7955 .vece = MO_64 },
7956 };
7957 return do_sve2_shr_narrow(s, a, ops);
7958}
7959
7960static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7961{
7962 static const GVecGen2i ops[3] = {
7963 { .fno = gen_helper_sve2_sqrshrnb_h },
7964 { .fno = gen_helper_sve2_sqrshrnb_s },
7965 { .fno = gen_helper_sve2_sqrshrnb_d },
7966 };
7967 return do_sve2_shr_narrow(s, a, ops);
7968}
7969
7970static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7971{
7972 static const GVecGen2i ops[3] = {
7973 { .fno = gen_helper_sve2_sqrshrnt_h },
7974 { .fno = gen_helper_sve2_sqrshrnt_s },
7975 { .fno = gen_helper_sve2_sqrshrnt_d },
7976 };
7977 return do_sve2_shr_narrow(s, a, ops);
7978}
7979
c13418da
RH
7980static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7981 TCGv_vec n, int64_t shr)
7982{
7983 TCGv_vec t = tcg_temp_new_vec_matching(d);
7984 int halfbits = 4 << vece;
7985
7986 tcg_gen_shri_vec(vece, n, n, shr);
7987 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7988 tcg_gen_umin_vec(vece, d, n, t);
7989 tcg_temp_free_vec(t);
7990}
7991
7992static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7993{
7994 static const TCGOpcode vec_list[] = {
7995 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7996 };
7997 static const GVecGen2i ops[3] = {
7998 { .fniv = gen_uqshrnb_vec,
7999 .opt_opc = vec_list,
8000 .fno = gen_helper_sve2_uqshrnb_h,
8001 .vece = MO_16 },
8002 { .fniv = gen_uqshrnb_vec,
8003 .opt_opc = vec_list,
8004 .fno = gen_helper_sve2_uqshrnb_s,
8005 .vece = MO_32 },
8006 { .fniv = gen_uqshrnb_vec,
8007 .opt_opc = vec_list,
8008 .fno = gen_helper_sve2_uqshrnb_d,
8009 .vece = MO_64 },
8010 };
8011 return do_sve2_shr_narrow(s, a, ops);
8012}
8013
8014static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
8015 TCGv_vec n, int64_t shr)
8016{
8017 TCGv_vec t = tcg_temp_new_vec_matching(d);
8018 int halfbits = 4 << vece;
8019
8020 tcg_gen_shri_vec(vece, n, n, shr);
8021 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8022 tcg_gen_umin_vec(vece, n, n, t);
8023 tcg_gen_shli_vec(vece, n, n, halfbits);
8024 tcg_gen_bitsel_vec(vece, d, t, d, n);
8025 tcg_temp_free_vec(t);
8026}
8027
8028static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
8029{
8030 static const TCGOpcode vec_list[] = {
8031 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8032 };
8033 static const GVecGen2i ops[3] = {
8034 { .fniv = gen_uqshrnt_vec,
8035 .opt_opc = vec_list,
8036 .load_dest = true,
8037 .fno = gen_helper_sve2_uqshrnt_h,
8038 .vece = MO_16 },
8039 { .fniv = gen_uqshrnt_vec,
8040 .opt_opc = vec_list,
8041 .load_dest = true,
8042 .fno = gen_helper_sve2_uqshrnt_s,
8043 .vece = MO_32 },
8044 { .fniv = gen_uqshrnt_vec,
8045 .opt_opc = vec_list,
8046 .load_dest = true,
8047 .fno = gen_helper_sve2_uqshrnt_d,
8048 .vece = MO_64 },
8049 };
8050 return do_sve2_shr_narrow(s, a, ops);
8051}
8052
8053static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
8054{
8055 static const GVecGen2i ops[3] = {
8056 { .fno = gen_helper_sve2_uqrshrnb_h },
8057 { .fno = gen_helper_sve2_uqrshrnb_s },
8058 { .fno = gen_helper_sve2_uqrshrnb_d },
8059 };
8060 return do_sve2_shr_narrow(s, a, ops);
8061}
8062
8063static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8064{
8065 static const GVecGen2i ops[3] = {
8066 { .fno = gen_helper_sve2_uqrshrnt_h },
8067 { .fno = gen_helper_sve2_uqrshrnt_s },
8068 { .fno = gen_helper_sve2_uqrshrnt_d },
8069 };
8070 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 8071}
b87dbeeb 8072
40d5ea50
SL
8073#define DO_SVE2_ZZZ_NARROW(NAME, name) \
8074static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
8075{ \
8076 static gen_helper_gvec_3 * const fns[4] = { \
8077 NULL, gen_helper_sve2_##name##_h, \
8078 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
8079 }; \
8080 return do_sve2_zzz_ool(s, a, fns[a->esz]); \
8081}
8082
8083DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8084DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
8085DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8086DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 8087
c3cd6766
SL
8088DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8089DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
8090DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8091DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 8092
e0ae6ec3
SL
8093static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8094 gen_helper_gvec_flags_4 *fn)
8095{
8096 if (!dc_isar_feature(aa64_sve2, s)) {
8097 return false;
8098 }
8099 return do_ppzz_flags(s, a, fn);
8100}
8101
8102#define DO_SVE2_PPZZ_MATCH(NAME, name) \
8103static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8104{ \
8105 static gen_helper_gvec_flags_4 * const fns[4] = { \
8106 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
8107 NULL, NULL \
8108 }; \
8109 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
8110}
8111
8112DO_SVE2_PPZZ_MATCH(MATCH, match)
8113DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8114
7d47ac94
SL
8115static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8116{
8117 static gen_helper_gvec_4 * const fns[2] = {
8118 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8119 };
8120 if (a->esz < 2) {
8121 return false;
8122 }
8123 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8124}
8125
8126static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8127{
8128 if (a->esz != 0) {
8129 return false;
8130 }
8131 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8132}
8133
b87dbeeb
SL
8134static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8135 gen_helper_gvec_4_ptr *fn)
8136{
8137 if (!dc_isar_feature(aa64_sve2, s)) {
8138 return false;
8139 }
8140 return do_zpzz_fp(s, a, fn);
8141}
8142
8143#define DO_SVE2_ZPZZ_FP(NAME, name) \
8144static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8145{ \
8146 static gen_helper_gvec_4_ptr * const fns[4] = { \
8147 NULL, gen_helper_sve2_##name##_zpzz_h, \
8148 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
8149 }; \
8150 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
8151}
8152
8153DO_SVE2_ZPZZ_FP(FADDP, faddp)
8154DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8155DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8156DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8157DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
8158
8159/*
8160 * SVE Integer Multiply-Add (unpredicated)
8161 */
8162
4f26756b
SL
8163static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8164{
8165 gen_helper_gvec_4_ptr *fn;
8166
8167 switch (a->esz) {
8168 case MO_32:
8169 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8170 return false;
8171 }
8172 fn = gen_helper_fmmla_s;
8173 break;
8174 case MO_64:
8175 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8176 return false;
8177 }
8178 fn = gen_helper_fmmla_d;
8179 break;
8180 default:
8181 return false;
8182 }
8183
8184 if (sve_access_check(s)) {
8185 unsigned vsz = vec_full_reg_size(s);
8186 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8187 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8188 vec_full_reg_offset(s, a->rn),
8189 vec_full_reg_offset(s, a->rm),
8190 vec_full_reg_offset(s, a->ra),
8191 status, vsz, vsz, 0, fn);
8192 tcg_temp_free_ptr(status);
8193 }
8194 return true;
8195}
8196
bfc9307e
RH
8197static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8198 bool sel1, bool sel2)
8199{
8200 static gen_helper_gvec_4 * const fns[] = {
8201 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
8202 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8203 };
8204 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8205}
8206
8207static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8208 bool sel1, bool sel2)
8209{
8210 static gen_helper_gvec_4 * const fns[] = {
8211 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
8212 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8213 };
8214 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8215}
8216
8217static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8218{
8219 return do_sqdmlal_zzzw(s, a, false, false);
8220}
8221
8222static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8223{
8224 return do_sqdmlal_zzzw(s, a, true, true);
8225}
8226
8227static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8228{
8229 return do_sqdmlal_zzzw(s, a, false, true);
8230}
8231
8232static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8233{
8234 return do_sqdmlsl_zzzw(s, a, false, false);
8235}
8236
8237static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8238{
8239 return do_sqdmlsl_zzzw(s, a, true, true);
8240}
8241
8242static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8243{
8244 return do_sqdmlsl_zzzw(s, a, false, true);
8245}
ab3ddf31
RH
8246
8247static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8248{
8249 static gen_helper_gvec_4 * const fns[] = {
8250 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8251 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8252 };
8253 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8254}
8255
8256static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8257{
8258 static gen_helper_gvec_4 * const fns[] = {
8259 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8260 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8261 };
8262 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8263}
45a32e80
RH
8264
8265static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8266{
8267 static gen_helper_gvec_4 * const fns[] = {
8268 NULL, gen_helper_sve2_smlal_zzzw_h,
8269 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8270 };
8271 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8272}
8273
8274static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8275{
8276 return do_smlal_zzzw(s, a, false);
8277}
8278
8279static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8280{
8281 return do_smlal_zzzw(s, a, true);
8282}
8283
8284static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8285{
8286 static gen_helper_gvec_4 * const fns[] = {
8287 NULL, gen_helper_sve2_umlal_zzzw_h,
8288 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8289 };
8290 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8291}
8292
8293static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8294{
8295 return do_umlal_zzzw(s, a, false);
8296}
8297
8298static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8299{
8300 return do_umlal_zzzw(s, a, true);
8301}
8302
8303static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8304{
8305 static gen_helper_gvec_4 * const fns[] = {
8306 NULL, gen_helper_sve2_smlsl_zzzw_h,
8307 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8308 };
8309 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8310}
8311
8312static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8313{
8314 return do_smlsl_zzzw(s, a, false);
8315}
8316
8317static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8318{
8319 return do_smlsl_zzzw(s, a, true);
8320}
8321
8322static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8323{
8324 static gen_helper_gvec_4 * const fns[] = {
8325 NULL, gen_helper_sve2_umlsl_zzzw_h,
8326 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8327 };
8328 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8329}
8330
8331static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8332{
8333 return do_umlsl_zzzw(s, a, false);
8334}
8335
8336static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8337{
8338 return do_umlsl_zzzw(s, a, true);
8339}
d782d3ca
RH
8340
8341static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8342{
8343 static gen_helper_gvec_4 * const fns[] = {
8344 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8345 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8346 };
8347
8348 if (!dc_isar_feature(aa64_sve2, s)) {
8349 return false;
8350 }
8351 if (sve_access_check(s)) {
8352 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8353 }
8354 return true;
8355}
8356
21068f39
RH
8357static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8358{
8359 if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8360 return false;
8361 }
8362 if (sve_access_check(s)) {
8363 gen_helper_gvec_4 *fn = (a->esz == MO_32
8364 ? gen_helper_sve2_cdot_zzzz_s
8365 : gen_helper_sve2_cdot_zzzz_d);
8366 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8367 }
8368 return true;
8369}
8370
d782d3ca
RH
8371static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8372{
8373 static gen_helper_gvec_4 * const fns[] = {
8374 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8375 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8376 };
8377
8378 if (!dc_isar_feature(aa64_sve2, s)) {
8379 return false;
8380 }
8381 if (sve_access_check(s)) {
8382 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8383 }
8384 return true;
8385}
6a98cb2a
RH
8386
8387static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8388{
8389 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8390 return false;
8391 }
8392 if (sve_access_check(s)) {
8393 unsigned vsz = vec_full_reg_size(s);
8394 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8395 vec_full_reg_offset(s, a->rn),
8396 vec_full_reg_offset(s, a->rm),
8397 vec_full_reg_offset(s, a->ra),
8398 vsz, vsz, 0, gen_helper_gvec_usdot_b);
8399 }
8400 return true;
8401}
b2bcd1be
RH
8402
8403static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
8404{
8405 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8406 return false;
8407 }
8408 if (sve_access_check(s)) {
8409 gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
8410 }
8411 return true;
8412}
3cc7a88e
RH
8413
8414static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8415{
8416 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8417 return false;
8418 }
8419 if (sve_access_check(s)) {
8420 gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
8421 a->rd, a->rn, a->rm, decrypt);
8422 }
8423 return true;
8424}
8425
8426static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8427{
8428 return do_aese(s, a, false);
8429}
8430
8431static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8432{
8433 return do_aese(s, a, true);
8434}
8435
8436static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8437{
8438 if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8439 return false;
8440 }
8441 if (sve_access_check(s)) {
8442 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
8443 }
8444 return true;
8445}
8446
8447static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8448{
8449 return do_sm4(s, a, gen_helper_crypto_sm4e);
8450}
3358eb3f
RH
8451
8452static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8453{
8454 return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8455}
8456
8457static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8458{
8459 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8460 return false;
8461 }
8462 if (sve_access_check(s)) {
8463 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8464 }
8465 return true;
8466}
5c1b7226
RH
8467
8468static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8469{
8470 if (!dc_isar_feature(aa64_sve2, s)) {
8471 return false;
8472 }
8473 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8474}
8475
d29b17ca
RH
8476static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8477{
8478 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8479 return false;
8480 }
8481 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8482}
8483
5c1b7226
RH
8484static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8485{
8486 if (!dc_isar_feature(aa64_sve2, s)) {
8487 return false;
8488 }
8489 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8490}
83c2523f
SL
8491
8492static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8493{
8494 if (!dc_isar_feature(aa64_sve2, s)) {
8495 return false;
8496 }
8497 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8498}
8499
8500static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8501{
8502 if (!dc_isar_feature(aa64_sve2, s)) {
8503 return false;
8504 }
8505 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8506}
95365277
SL
8507
8508static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8509{
8510 if (!dc_isar_feature(aa64_sve2, s)) {
8511 return false;
8512 }
8513 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8514}
8515
8516static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8517{
8518 if (!dc_isar_feature(aa64_sve2, s)) {
8519 return false;
8520 }
8521 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8522}
631be02e
SL
8523
8524static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8525{
8526 static gen_helper_gvec_3_ptr * const fns[] = {
8527 NULL, gen_helper_flogb_h,
8528 gen_helper_flogb_s, gen_helper_flogb_d
8529 };
8530
8531 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8532 return false;
8533 }
8534 if (sve_access_check(s)) {
8535 TCGv_ptr status =
8536 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8537 unsigned vsz = vec_full_reg_size(s);
8538
8539 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8540 vec_full_reg_offset(s, a->rn),
8541 pred_full_reg_offset(s, a->pg),
8542 status, vsz, vsz, 0, fns[a->esz]);
8543 tcg_temp_free_ptr(status);
8544 }
8545 return true;
8546}
50d102bd
SL
8547
8548static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8549{
8550 if (!dc_isar_feature(aa64_sve2, s)) {
8551 return false;
8552 }
8553 if (sve_access_check(s)) {
8554 unsigned vsz = vec_full_reg_size(s);
8555 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8556 vec_full_reg_offset(s, a->rn),
8557 vec_full_reg_offset(s, a->rm),
8558 vec_full_reg_offset(s, a->ra),
8559 cpu_env, vsz, vsz, (sel << 1) | sub,
8560 gen_helper_sve2_fmlal_zzzw_s);
8561 }
8562 return true;
8563}
8564
8565static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8566{
8567 return do_FMLAL_zzzw(s, a, false, false);
8568}
8569
8570static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8571{
8572 return do_FMLAL_zzzw(s, a, false, true);
8573}
8574
8575static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8576{
8577 return do_FMLAL_zzzw(s, a, true, false);
8578}
8579
8580static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8581{
8582 return do_FMLAL_zzzw(s, a, true, true);
8583}
8584
8585static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8586{
8587 if (!dc_isar_feature(aa64_sve2, s)) {
8588 return false;
8589 }
8590 if (sve_access_check(s)) {
8591 unsigned vsz = vec_full_reg_size(s);
8592 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8593 vec_full_reg_offset(s, a->rn),
8594 vec_full_reg_offset(s, a->rm),
8595 vec_full_reg_offset(s, a->ra),
8596 cpu_env, vsz, vsz,
8597 (a->index << 2) | (sel << 1) | sub,
8598 gen_helper_sve2_fmlal_zzxw_s);
8599 }
8600 return true;
8601}
8602
8603static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8604{
8605 return do_FMLAL_zzxw(s, a, false, false);
8606}
8607
8608static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8609{
8610 return do_FMLAL_zzxw(s, a, false, true);
8611}
8612
8613static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8614{
8615 return do_FMLAL_zzxw(s, a, true, false);
8616}
8617
8618static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8619{
8620 return do_FMLAL_zzxw(s, a, true, true);
8621}
2323c5ff
RH
8622
8623static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8624 gen_helper_gvec_4 *fn, int data)
8625{
8626 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8627 return false;
8628 }
8629 if (sve_access_check(s)) {
8630 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8631 }
8632 return true;
8633}
8634
8635static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8636{
8637 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8638}
8639
8640static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8641{
8642 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8643}
8644
8645static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8646{
8647 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8648}
cb8657f7
RH
8649
8650static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8651{
8652 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8653 return false;
8654 }
8655 if (sve_access_check(s)) {
8656 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8657 a->rd, a->rn, a->rm, a->ra, 0);
8658 }
8659 return true;
8660}
83914478
RH
8661
8662static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8663{
8664 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8665 return false;
8666 }
8667 if (sve_access_check(s)) {
8668 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8669 a->rd, a->rn, a->rm, a->ra, a->index);
8670 }
8671 return true;
8672}
81266a1f
RH
8673
8674static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8675{
8676 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8677 return false;
8678 }
8679 if (sve_access_check(s)) {
8680 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8681 a->rd, a->rn, a->rm, a->ra, 0);
8682 }
8683 return true;
8684}
5693887f
RH
8685
8686static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8687{
8688 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8689 return false;
8690 }
8691 if (sve_access_check(s)) {
8692 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8693 unsigned vsz = vec_full_reg_size(s);
8694
8695 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8696 vec_full_reg_offset(s, a->rn),
8697 vec_full_reg_offset(s, a->rm),
8698 vec_full_reg_offset(s, a->ra),
8699 status, vsz, vsz, sel,
8700 gen_helper_gvec_bfmlal);
8701 tcg_temp_free_ptr(status);
8702 }
8703 return true;
8704}
8705
8706static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8707{
8708 return do_BFMLAL_zzzw(s, a, false);
8709}
8710
8711static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8712{
8713 return do_BFMLAL_zzzw(s, a, true);
8714}
458d0ab6
RH
8715
8716static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8717{
8718 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8719 return false;
8720 }
8721 if (sve_access_check(s)) {
8722 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8723 unsigned vsz = vec_full_reg_size(s);
8724
8725 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8726 vec_full_reg_offset(s, a->rn),
8727 vec_full_reg_offset(s, a->rm),
8728 vec_full_reg_offset(s, a->ra),
8729 status, vsz, vsz, (a->index << 1) | sel,
8730 gen_helper_gvec_bfmlal_idx);
8731 tcg_temp_free_ptr(status);
8732 }
8733 return true;
8734}
8735
8736static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8737{
8738 return do_BFMLAL_zzxw(s, a, false);
8739}
8740
8741static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8742{
8743 return do_BFMLAL_zzxw(s, a, true);
8744}