]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use tcg_constant in do_zzi_{sat, ool}, do_fp_imm
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a
RH
139/* Invoke an out-of-line helper on 2 Zregs. */
140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
141 int rd, int rn, int data)
142{
143 unsigned vsz = vec_full_reg_size(s);
144 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
145 vec_full_reg_offset(s, rn),
146 vsz, vsz, data, fn);
147}
148
e645d1a1
RH
149/* Invoke an out-of-line helper on 3 Zregs. */
150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
151 int rd, int rn, int rm, int data)
152{
153 unsigned vsz = vec_full_reg_size(s);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
155 vec_full_reg_offset(s, rn),
156 vec_full_reg_offset(s, rm),
157 vsz, vsz, data, fn);
158}
159
38650638
RH
160/* Invoke an out-of-line helper on 4 Zregs. */
161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
162 int rd, int rn, int rm, int ra, int data)
163{
164 unsigned vsz = vec_full_reg_size(s);
165 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
166 vec_full_reg_offset(s, rn),
167 vec_full_reg_offset(s, rm),
168 vec_full_reg_offset(s, ra),
169 vsz, vsz, data, fn);
170}
171
96a461f7
RH
172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
174 int rd, int rn, int pg, int data)
175{
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
178 vec_full_reg_offset(s, rn),
179 pred_full_reg_offset(s, pg),
180 vsz, vsz, data, fn);
181}
182
36cbb7a8
RH
183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
185 int rd, int rn, int rm, int pg, int data)
186{
187 unsigned vsz = vec_full_reg_size(s);
188 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
189 vec_full_reg_offset(s, rn),
190 vec_full_reg_offset(s, rm),
191 pred_full_reg_offset(s, pg),
192 vsz, vsz, data, fn);
193}
f7d79c41 194
36cbb7a8 195/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
197 int esz, int rd, int rn)
38388f7e 198{
f7d79c41
RH
199 unsigned vsz = vec_full_reg_size(s);
200 gvec_fn(esz, vec_full_reg_offset(s, rd),
201 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
202}
203
39eea561 204/* Invoke a vector expander on three Zregs. */
28c4da31
RH
205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
206 int esz, int rd, int rn, int rm)
38388f7e 207{
28c4da31
RH
208 unsigned vsz = vec_full_reg_size(s);
209 gvec_fn(esz, vec_full_reg_offset(s, rd),
210 vec_full_reg_offset(s, rn),
211 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
212}
213
911cdc6d
RH
214/* Invoke a vector expander on four Zregs. */
215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
216 int esz, int rd, int rn, int rm, int ra)
217{
218 unsigned vsz = vec_full_reg_size(s);
219 gvec_fn(esz, vec_full_reg_offset(s, rd),
220 vec_full_reg_offset(s, rn),
221 vec_full_reg_offset(s, rm),
222 vec_full_reg_offset(s, ra), vsz, vsz);
223}
224
39eea561
RH
225/* Invoke a vector move on two Zregs. */
226static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 227{
f7d79c41
RH
228 if (sve_access_check(s)) {
229 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
230 }
231 return true;
38388f7e
RH
232}
233
d9d78dcc
RH
234/* Initialize a Zreg with replications of a 64-bit immediate. */
235static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
236{
237 unsigned vsz = vec_full_reg_size(s);
8711e71f 238 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
239}
240
516e246a 241/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
243 int rd, int rn, int rm)
516e246a 244{
dd81a8d7
RH
245 unsigned psz = pred_gvec_reg_size(s);
246 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
247 pred_full_reg_offset(s, rn),
248 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
249}
250
251/* Invoke a vector move on two Pregs. */
252static bool do_mov_p(DisasContext *s, int rd, int rn)
253{
d0b2df5a
RH
254 if (sve_access_check(s)) {
255 unsigned psz = pred_gvec_reg_size(s);
256 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
257 pred_full_reg_offset(s, rn), psz, psz);
258 }
259 return true;
516e246a
RH
260}
261
9e18d7a6
RH
262/* Set the cpu flags as per a return from an SVE helper. */
263static void do_pred_flags(TCGv_i32 t)
264{
265 tcg_gen_mov_i32(cpu_NF, t);
266 tcg_gen_andi_i32(cpu_ZF, t, 2);
267 tcg_gen_andi_i32(cpu_CF, t, 1);
268 tcg_gen_movi_i32(cpu_VF, 0);
269}
270
271/* Subroutines computing the ARM PredTest psuedofunction. */
272static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
273{
274 TCGv_i32 t = tcg_temp_new_i32();
275
276 gen_helper_sve_predtest1(t, d, g);
277 do_pred_flags(t);
278 tcg_temp_free_i32(t);
279}
280
281static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
282{
283 TCGv_ptr dptr = tcg_temp_new_ptr();
284 TCGv_ptr gptr = tcg_temp_new_ptr();
285 TCGv_i32 t;
286
287 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
288 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
289 t = tcg_const_i32(words);
290
291 gen_helper_sve_predtest(t, dptr, gptr, t);
292 tcg_temp_free_ptr(dptr);
293 tcg_temp_free_ptr(gptr);
294
295 do_pred_flags(t);
296 tcg_temp_free_i32(t);
297}
298
028e2a7b
RH
299/* For each element size, the bits within a predicate word that are active. */
300const uint64_t pred_esz_masks[4] = {
301 0xffffffffffffffffull, 0x5555555555555555ull,
302 0x1111111111111111ull, 0x0101010101010101ull
303};
304
39eea561
RH
305/*
306 *** SVE Logical - Unpredicated Group
307 */
308
28c4da31
RH
309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
310{
311 if (sve_access_check(s)) {
312 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
313 }
314 return true;
315}
316
3a7be554 317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 318{
28c4da31 319 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
320}
321
3a7be554 322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 323{
28c4da31 324 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
325}
326
3a7be554 327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 328{
28c4da31 329 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
330}
331
3a7be554 332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 333{
28c4da31 334 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 335}
d1822297 336
e6eba6e5
RH
337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
338{
339 TCGv_i64 t = tcg_temp_new_i64();
340 uint64_t mask = dup_const(MO_8, 0xff >> sh);
341
342 tcg_gen_xor_i64(t, n, m);
343 tcg_gen_shri_i64(d, t, sh);
344 tcg_gen_shli_i64(t, t, 8 - sh);
345 tcg_gen_andi_i64(d, d, mask);
346 tcg_gen_andi_i64(t, t, ~mask);
347 tcg_gen_or_i64(d, d, t);
348 tcg_temp_free_i64(t);
349}
350
351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
352{
353 TCGv_i64 t = tcg_temp_new_i64();
354 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
355
356 tcg_gen_xor_i64(t, n, m);
357 tcg_gen_shri_i64(d, t, sh);
358 tcg_gen_shli_i64(t, t, 16 - sh);
359 tcg_gen_andi_i64(d, d, mask);
360 tcg_gen_andi_i64(t, t, ~mask);
361 tcg_gen_or_i64(d, d, t);
362 tcg_temp_free_i64(t);
363}
364
365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
366{
367 tcg_gen_xor_i32(d, n, m);
368 tcg_gen_rotri_i32(d, d, sh);
369}
370
371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
372{
373 tcg_gen_xor_i64(d, n, m);
374 tcg_gen_rotri_i64(d, d, sh);
375}
376
377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
378 TCGv_vec m, int64_t sh)
379{
380 tcg_gen_xor_vec(vece, d, n, m);
381 tcg_gen_rotri_vec(vece, d, d, sh);
382}
383
384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
385 uint32_t rm_ofs, int64_t shift,
386 uint32_t opr_sz, uint32_t max_sz)
387{
388 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
389 static const GVecGen3i ops[4] = {
390 { .fni8 = gen_xar8_i64,
391 .fniv = gen_xar_vec,
392 .fno = gen_helper_sve2_xar_b,
393 .opt_opc = vecop,
394 .vece = MO_8 },
395 { .fni8 = gen_xar16_i64,
396 .fniv = gen_xar_vec,
397 .fno = gen_helper_sve2_xar_h,
398 .opt_opc = vecop,
399 .vece = MO_16 },
400 { .fni4 = gen_xar_i32,
401 .fniv = gen_xar_vec,
402 .fno = gen_helper_sve2_xar_s,
403 .opt_opc = vecop,
404 .vece = MO_32 },
405 { .fni8 = gen_xar_i64,
406 .fniv = gen_xar_vec,
407 .fno = gen_helper_gvec_xar_d,
408 .opt_opc = vecop,
409 .vece = MO_64 }
410 };
411 int esize = 8 << vece;
412
413 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
414 tcg_debug_assert(shift >= 0);
415 tcg_debug_assert(shift <= esize);
416 shift &= esize - 1;
417
418 if (shift == 0) {
419 /* xar with no rotate devolves to xor. */
420 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
421 } else {
422 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
423 shift, &ops[vece]);
424 }
425}
426
427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
428{
429 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
430 return false;
431 }
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
435 vec_full_reg_offset(s, a->rn),
436 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
437 }
438 return true;
439}
440
911cdc6d
RH
441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
442{
443 if (!dc_isar_feature(aa64_sve2, s)) {
444 return false;
445 }
446 if (sve_access_check(s)) {
447 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
448 }
449 return true;
450}
451
452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
453{
454 tcg_gen_xor_i64(d, n, m);
455 tcg_gen_xor_i64(d, d, k);
456}
457
458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
459 TCGv_vec m, TCGv_vec k)
460{
461 tcg_gen_xor_vec(vece, d, n, m);
462 tcg_gen_xor_vec(vece, d, d, k);
463}
464
465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
466 uint32_t a, uint32_t oprsz, uint32_t maxsz)
467{
468 static const GVecGen4 op = {
469 .fni8 = gen_eor3_i64,
470 .fniv = gen_eor3_vec,
471 .fno = gen_helper_sve2_eor3,
472 .vece = MO_64,
473 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
474 };
475 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
476}
477
478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
479{
480 return do_sve2_zzzz_fn(s, a, gen_eor3);
481}
482
483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
484{
485 tcg_gen_andc_i64(d, m, k);
486 tcg_gen_xor_i64(d, d, n);
487}
488
489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
490 TCGv_vec m, TCGv_vec k)
491{
492 tcg_gen_andc_vec(vece, d, m, k);
493 tcg_gen_xor_vec(vece, d, d, n);
494}
495
496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
497 uint32_t a, uint32_t oprsz, uint32_t maxsz)
498{
499 static const GVecGen4 op = {
500 .fni8 = gen_bcax_i64,
501 .fniv = gen_bcax_vec,
502 .fno = gen_helper_sve2_bcax,
503 .vece = MO_64,
504 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
505 };
506 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
507}
508
509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
510{
511 return do_sve2_zzzz_fn(s, a, gen_bcax);
512}
513
514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
515 uint32_t a, uint32_t oprsz, uint32_t maxsz)
516{
517 /* BSL differs from the generic bitsel in argument ordering. */
518 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
519}
520
521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
522{
523 return do_sve2_zzzz_fn(s, a, gen_bsl);
524}
525
526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
527{
528 tcg_gen_andc_i64(n, k, n);
529 tcg_gen_andc_i64(m, m, k);
530 tcg_gen_or_i64(d, n, m);
531}
532
533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
534 TCGv_vec m, TCGv_vec k)
535{
536 if (TCG_TARGET_HAS_bitsel_vec) {
537 tcg_gen_not_vec(vece, n, n);
538 tcg_gen_bitsel_vec(vece, d, k, n, m);
539 } else {
540 tcg_gen_andc_vec(vece, n, k, n);
541 tcg_gen_andc_vec(vece, m, m, k);
542 tcg_gen_or_vec(vece, d, n, m);
543 }
544}
545
546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
547 uint32_t a, uint32_t oprsz, uint32_t maxsz)
548{
549 static const GVecGen4 op = {
550 .fni8 = gen_bsl1n_i64,
551 .fniv = gen_bsl1n_vec,
552 .fno = gen_helper_sve2_bsl1n,
553 .vece = MO_64,
554 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
555 };
556 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
557}
558
559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
560{
561 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
562}
563
564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
565{
566 /*
567 * Z[dn] = (n & k) | (~m & ~k)
568 * = | ~(m | k)
569 */
570 tcg_gen_and_i64(n, n, k);
571 if (TCG_TARGET_HAS_orc_i64) {
572 tcg_gen_or_i64(m, m, k);
573 tcg_gen_orc_i64(d, n, m);
574 } else {
575 tcg_gen_nor_i64(m, m, k);
576 tcg_gen_or_i64(d, n, m);
577 }
578}
579
580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
581 TCGv_vec m, TCGv_vec k)
582{
583 if (TCG_TARGET_HAS_bitsel_vec) {
584 tcg_gen_not_vec(vece, m, m);
585 tcg_gen_bitsel_vec(vece, d, k, n, m);
586 } else {
587 tcg_gen_and_vec(vece, n, n, k);
588 tcg_gen_or_vec(vece, m, m, k);
589 tcg_gen_orc_vec(vece, d, n, m);
590 }
591}
592
593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
594 uint32_t a, uint32_t oprsz, uint32_t maxsz)
595{
596 static const GVecGen4 op = {
597 .fni8 = gen_bsl2n_i64,
598 .fniv = gen_bsl2n_vec,
599 .fno = gen_helper_sve2_bsl2n,
600 .vece = MO_64,
601 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
602 };
603 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
604}
605
606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
607{
608 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
609}
610
611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
612{
613 tcg_gen_and_i64(n, n, k);
614 tcg_gen_andc_i64(m, m, k);
615 tcg_gen_nor_i64(d, n, m);
616}
617
618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
619 TCGv_vec m, TCGv_vec k)
620{
621 tcg_gen_bitsel_vec(vece, d, k, n, m);
622 tcg_gen_not_vec(vece, d, d);
623}
624
625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
626 uint32_t a, uint32_t oprsz, uint32_t maxsz)
627{
628 static const GVecGen4 op = {
629 .fni8 = gen_nbsl_i64,
630 .fniv = gen_nbsl_vec,
631 .fno = gen_helper_sve2_nbsl,
632 .vece = MO_64,
633 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
634 };
635 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
636}
637
638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
639{
640 return do_sve2_zzzz_fn(s, a, gen_nbsl);
641}
642
fea98f9c
RH
643/*
644 *** SVE Integer Arithmetic - Unpredicated Group
645 */
646
3a7be554 647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 648{
28c4da31 649 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
650}
651
3a7be554 652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 653{
28c4da31 654 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
655}
656
3a7be554 657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 658{
28c4da31 659 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
660}
661
3a7be554 662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 663{
28c4da31 664 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
665}
666
3a7be554 667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 668{
28c4da31 669 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
670}
671
3a7be554 672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 673{
28c4da31 674 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
675}
676
f97cfd59
RH
677/*
678 *** SVE Integer Arithmetic - Binary Predicated Group
679 */
680
681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
682{
f97cfd59
RH
683 if (fn == NULL) {
684 return false;
685 }
686 if (sve_access_check(s)) {
36cbb7a8 687 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
688 }
689 return true;
690}
691
a2103582
RH
692/* Select active elememnts from Zn and inactive elements from Zm,
693 * storing the result in Zd.
694 */
695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
696{
697 static gen_helper_gvec_4 * const fns[4] = {
698 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
699 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
700 };
36cbb7a8 701 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
702}
703
f97cfd59 704#define DO_ZPZZ(NAME, name) \
3a7be554 705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
706{ \
707 static gen_helper_gvec_4 * const fns[4] = { \
708 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
709 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
710 }; \
711 return do_zpzz_ool(s, a, fns[a->esz]); \
712}
713
714DO_ZPZZ(AND, and)
715DO_ZPZZ(EOR, eor)
716DO_ZPZZ(ORR, orr)
717DO_ZPZZ(BIC, bic)
718
719DO_ZPZZ(ADD, add)
720DO_ZPZZ(SUB, sub)
721
722DO_ZPZZ(SMAX, smax)
723DO_ZPZZ(UMAX, umax)
724DO_ZPZZ(SMIN, smin)
725DO_ZPZZ(UMIN, umin)
726DO_ZPZZ(SABD, sabd)
727DO_ZPZZ(UABD, uabd)
728
729DO_ZPZZ(MUL, mul)
730DO_ZPZZ(SMULH, smulh)
731DO_ZPZZ(UMULH, umulh)
732
27721dbb
RH
733DO_ZPZZ(ASR, asr)
734DO_ZPZZ(LSR, lsr)
735DO_ZPZZ(LSL, lsl)
736
3a7be554 737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
738{
739 static gen_helper_gvec_4 * const fns[4] = {
740 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
741 };
742 return do_zpzz_ool(s, a, fns[a->esz]);
743}
744
3a7be554 745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
746{
747 static gen_helper_gvec_4 * const fns[4] = {
748 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
749 };
750 return do_zpzz_ool(s, a, fns[a->esz]);
751}
752
3a7be554 753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
754{
755 if (sve_access_check(s)) {
756 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
757 }
758 return true;
759}
d3fe4a29 760
f97cfd59
RH
761#undef DO_ZPZZ
762
afac6d04
RH
763/*
764 *** SVE Integer Arithmetic - Unary Predicated Group
765 */
766
767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
768{
769 if (fn == NULL) {
770 return false;
771 }
772 if (sve_access_check(s)) {
96a461f7 773 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
774 }
775 return true;
776}
777
778#define DO_ZPZ(NAME, name) \
3a7be554 779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
780{ \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
783 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
784 }; \
785 return do_zpz_ool(s, a, fns[a->esz]); \
786}
787
788DO_ZPZ(CLS, cls)
789DO_ZPZ(CLZ, clz)
790DO_ZPZ(CNT_zpz, cnt_zpz)
791DO_ZPZ(CNOT, cnot)
792DO_ZPZ(NOT_zpz, not_zpz)
793DO_ZPZ(ABS, abs)
794DO_ZPZ(NEG, neg)
795
3a7be554 796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
797{
798 static gen_helper_gvec_3 * const fns[4] = {
799 NULL,
800 gen_helper_sve_fabs_h,
801 gen_helper_sve_fabs_s,
802 gen_helper_sve_fabs_d
803 };
804 return do_zpz_ool(s, a, fns[a->esz]);
805}
806
3a7be554 807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
808{
809 static gen_helper_gvec_3 * const fns[4] = {
810 NULL,
811 gen_helper_sve_fneg_h,
812 gen_helper_sve_fneg_s,
813 gen_helper_sve_fneg_d
814 };
815 return do_zpz_ool(s, a, fns[a->esz]);
816}
817
3a7be554 818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
819{
820 static gen_helper_gvec_3 * const fns[4] = {
821 NULL,
822 gen_helper_sve_sxtb_h,
823 gen_helper_sve_sxtb_s,
824 gen_helper_sve_sxtb_d
825 };
826 return do_zpz_ool(s, a, fns[a->esz]);
827}
828
3a7be554 829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
830{
831 static gen_helper_gvec_3 * const fns[4] = {
832 NULL,
833 gen_helper_sve_uxtb_h,
834 gen_helper_sve_uxtb_s,
835 gen_helper_sve_uxtb_d
836 };
837 return do_zpz_ool(s, a, fns[a->esz]);
838}
839
3a7be554 840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
841{
842 static gen_helper_gvec_3 * const fns[4] = {
843 NULL, NULL,
844 gen_helper_sve_sxth_s,
845 gen_helper_sve_sxth_d
846 };
847 return do_zpz_ool(s, a, fns[a->esz]);
848}
849
3a7be554 850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
851{
852 static gen_helper_gvec_3 * const fns[4] = {
853 NULL, NULL,
854 gen_helper_sve_uxth_s,
855 gen_helper_sve_uxth_d
856 };
857 return do_zpz_ool(s, a, fns[a->esz]);
858}
859
3a7be554 860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
861{
862 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
863}
864
3a7be554 865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
866{
867 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
868}
869
870#undef DO_ZPZ
871
047cec97
RH
872/*
873 *** SVE Integer Reduction Group
874 */
875
876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
878 gen_helper_gvec_reduc *fn)
879{
880 unsigned vsz = vec_full_reg_size(s);
881 TCGv_ptr t_zn, t_pg;
882 TCGv_i32 desc;
883 TCGv_i64 temp;
884
885 if (fn == NULL) {
886 return false;
887 }
888 if (!sve_access_check(s)) {
889 return true;
890 }
891
892 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
893 temp = tcg_temp_new_i64();
894 t_zn = tcg_temp_new_ptr();
895 t_pg = tcg_temp_new_ptr();
896
897 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
898 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
899 fn(temp, t_zn, t_pg, desc);
900 tcg_temp_free_ptr(t_zn);
901 tcg_temp_free_ptr(t_pg);
902 tcg_temp_free_i32(desc);
903
904 write_fp_dreg(s, a->rd, temp);
905 tcg_temp_free_i64(temp);
906 return true;
907}
908
909#define DO_VPZ(NAME, name) \
3a7be554 910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
911{ \
912 static gen_helper_gvec_reduc * const fns[4] = { \
913 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
915 }; \
916 return do_vpz_ool(s, a, fns[a->esz]); \
917}
918
919DO_VPZ(ORV, orv)
920DO_VPZ(ANDV, andv)
921DO_VPZ(EORV, eorv)
922
923DO_VPZ(UADDV, uaddv)
924DO_VPZ(SMAXV, smaxv)
925DO_VPZ(UMAXV, umaxv)
926DO_VPZ(SMINV, sminv)
927DO_VPZ(UMINV, uminv)
928
3a7be554 929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
930{
931 static gen_helper_gvec_reduc * const fns[4] = {
932 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
933 gen_helper_sve_saddv_s, NULL
934 };
935 return do_vpz_ool(s, a, fns[a->esz]);
936}
937
938#undef DO_VPZ
939
ccd841c3
RH
940/*
941 *** SVE Shift by Immediate - Predicated Group
942 */
943
60245996
RH
944/*
945 * Copy Zn into Zd, storing zeros into inactive elements.
946 * If invert, store zeros into the active elements.
ccd841c3 947 */
60245996
RH
948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
949 int esz, bool invert)
ccd841c3 950{
60245996
RH
951 static gen_helper_gvec_3 * const fns[4] = {
952 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
953 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 954 };
60245996 955
ccd841c3 956 if (sve_access_check(s)) {
96a461f7 957 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
958 }
959 return true;
960}
961
962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
963 gen_helper_gvec_3 *fn)
964{
965 if (sve_access_check(s)) {
96a461f7 966 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
967 }
968 return true;
969}
970
3a7be554 971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
972{
973 static gen_helper_gvec_3 * const fns[4] = {
974 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
975 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
976 };
977 if (a->esz < 0) {
978 /* Invalid tsz encoding -- see tszimm_esz. */
979 return false;
980 }
981 /* Shift by element size is architecturally valid. For
982 arithmetic right-shift, it's the same as by one less. */
983 a->imm = MIN(a->imm, (8 << a->esz) - 1);
984 return do_zpzi_ool(s, a, fns[a->esz]);
985}
986
3a7be554 987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
988{
989 static gen_helper_gvec_3 * const fns[4] = {
990 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
991 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
992 };
993 if (a->esz < 0) {
994 return false;
995 }
996 /* Shift by element size is architecturally valid.
997 For logical shifts, it is a zeroing operation. */
998 if (a->imm >= (8 << a->esz)) {
60245996 999 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1000 } else {
1001 return do_zpzi_ool(s, a, fns[a->esz]);
1002 }
1003}
1004
3a7be554 1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1006{
1007 static gen_helper_gvec_3 * const fns[4] = {
1008 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1009 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1010 };
1011 if (a->esz < 0) {
1012 return false;
1013 }
1014 /* Shift by element size is architecturally valid.
1015 For logical shifts, it is a zeroing operation. */
1016 if (a->imm >= (8 << a->esz)) {
60245996 1017 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1018 } else {
1019 return do_zpzi_ool(s, a, fns[a->esz]);
1020 }
1021}
1022
3a7be554 1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
1024{
1025 static gen_helper_gvec_3 * const fns[4] = {
1026 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1027 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1028 };
1029 if (a->esz < 0) {
1030 return false;
1031 }
1032 /* Shift by element size is architecturally valid. For arithmetic
1033 right shift for division, it is a zeroing operation. */
1034 if (a->imm >= (8 << a->esz)) {
60245996 1035 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
1036 } else {
1037 return do_zpzi_ool(s, a, fns[a->esz]);
1038 }
1039}
1040
a5421b54
SL
1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1042{
1043 static gen_helper_gvec_3 * const fns[4] = {
1044 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1045 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1046 };
1047 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1048 return false;
1049 }
1050 return do_zpzi_ool(s, a, fns[a->esz]);
1051}
1052
1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1054{
1055 static gen_helper_gvec_3 * const fns[4] = {
1056 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1057 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1058 };
1059 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1060 return false;
1061 }
1062 return do_zpzi_ool(s, a, fns[a->esz]);
1063}
1064
1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1066{
1067 static gen_helper_gvec_3 * const fns[4] = {
1068 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1069 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1070 };
1071 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1072 return false;
1073 }
1074 return do_zpzi_ool(s, a, fns[a->esz]);
1075}
1076
1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1078{
1079 static gen_helper_gvec_3 * const fns[4] = {
1080 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1081 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1082 };
1083 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1084 return false;
1085 }
1086 return do_zpzi_ool(s, a, fns[a->esz]);
1087}
1088
1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1090{
1091 static gen_helper_gvec_3 * const fns[4] = {
1092 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1093 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1094 };
1095 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1096 return false;
1097 }
1098 return do_zpzi_ool(s, a, fns[a->esz]);
1099}
1100
fe7f8dfb
RH
1101/*
1102 *** SVE Bitwise Shift - Predicated Group
1103 */
1104
1105#define DO_ZPZW(NAME, name) \
3a7be554 1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
1107{ \
1108 static gen_helper_gvec_4 * const fns[3] = { \
1109 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1110 gen_helper_sve_##name##_zpzw_s, \
1111 }; \
1112 if (a->esz < 0 || a->esz >= 3) { \
1113 return false; \
1114 } \
1115 return do_zpzz_ool(s, a, fns[a->esz]); \
1116}
1117
1118DO_ZPZW(ASR, asr)
1119DO_ZPZW(LSR, lsr)
1120DO_ZPZW(LSL, lsl)
1121
1122#undef DO_ZPZW
1123
d9d78dcc
RH
1124/*
1125 *** SVE Bitwise Shift - Unpredicated Group
1126 */
1127
1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1129 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1130 int64_t, uint32_t, uint32_t))
1131{
1132 if (a->esz < 0) {
1133 /* Invalid tsz encoding -- see tszimm_esz. */
1134 return false;
1135 }
1136 if (sve_access_check(s)) {
1137 unsigned vsz = vec_full_reg_size(s);
1138 /* Shift by element size is architecturally valid. For
1139 arithmetic right-shift, it's the same as by one less.
1140 Otherwise it is a zeroing operation. */
1141 if (a->imm >= 8 << a->esz) {
1142 if (asr) {
1143 a->imm = (8 << a->esz) - 1;
1144 } else {
1145 do_dupi_z(s, a->rd, 0);
1146 return true;
1147 }
1148 }
1149 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1150 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1151 }
1152 return true;
1153}
1154
3a7be554 1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1156{
1157 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1158}
1159
3a7be554 1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1161{
1162 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1163}
1164
3a7be554 1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1166{
1167 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1168}
1169
1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1171{
1172 if (fn == NULL) {
1173 return false;
1174 }
1175 if (sve_access_check(s)) {
e645d1a1 1176 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
1177 }
1178 return true;
1179}
1180
1181#define DO_ZZW(NAME, name) \
3a7be554 1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
1183{ \
1184 static gen_helper_gvec_3 * const fns[4] = { \
1185 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1186 gen_helper_sve_##name##_zzw_s, NULL \
1187 }; \
1188 return do_zzw_ool(s, a, fns[a->esz]); \
1189}
1190
1191DO_ZZW(ASR, asr)
1192DO_ZZW(LSR, lsr)
1193DO_ZZW(LSL, lsl)
1194
1195#undef DO_ZZW
1196
96a36e4a
RH
1197/*
1198 *** SVE Integer Multiply-Add Group
1199 */
1200
1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1202 gen_helper_gvec_5 *fn)
1203{
1204 if (sve_access_check(s)) {
1205 unsigned vsz = vec_full_reg_size(s);
1206 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1207 vec_full_reg_offset(s, a->ra),
1208 vec_full_reg_offset(s, a->rn),
1209 vec_full_reg_offset(s, a->rm),
1210 pred_full_reg_offset(s, a->pg),
1211 vsz, vsz, 0, fn);
1212 }
1213 return true;
1214}
1215
1216#define DO_ZPZZZ(NAME, name) \
3a7be554 1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1218{ \
1219 static gen_helper_gvec_5 * const fns[4] = { \
1220 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1221 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1222 }; \
1223 return do_zpzzz_ool(s, a, fns[a->esz]); \
1224}
1225
1226DO_ZPZZZ(MLA, mla)
1227DO_ZPZZZ(MLS, mls)
1228
1229#undef DO_ZPZZZ
1230
9a56c9c3
RH
1231/*
1232 *** SVE Index Generation Group
1233 */
1234
1235static void do_index(DisasContext *s, int esz, int rd,
1236 TCGv_i64 start, TCGv_i64 incr)
1237{
1238 unsigned vsz = vec_full_reg_size(s);
1239 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1240 TCGv_ptr t_zd = tcg_temp_new_ptr();
1241
1242 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1243 if (esz == 3) {
1244 gen_helper_sve_index_d(t_zd, start, incr, desc);
1245 } else {
1246 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1247 static index_fn * const fns[3] = {
1248 gen_helper_sve_index_b,
1249 gen_helper_sve_index_h,
1250 gen_helper_sve_index_s,
1251 };
1252 TCGv_i32 s32 = tcg_temp_new_i32();
1253 TCGv_i32 i32 = tcg_temp_new_i32();
1254
1255 tcg_gen_extrl_i64_i32(s32, start);
1256 tcg_gen_extrl_i64_i32(i32, incr);
1257 fns[esz](t_zd, s32, i32, desc);
1258
1259 tcg_temp_free_i32(s32);
1260 tcg_temp_free_i32(i32);
1261 }
1262 tcg_temp_free_ptr(t_zd);
1263 tcg_temp_free_i32(desc);
1264}
1265
3a7be554 1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1267{
1268 if (sve_access_check(s)) {
b0c3aece
RH
1269 TCGv_i64 start = tcg_constant_i64(a->imm1);
1270 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1271 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1272 }
1273 return true;
1274}
1275
3a7be554 1276static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1277{
1278 if (sve_access_check(s)) {
b0c3aece 1279 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1280 TCGv_i64 incr = cpu_reg(s, a->rm);
1281 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1282 }
1283 return true;
1284}
1285
3a7be554 1286static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1287{
1288 if (sve_access_check(s)) {
1289 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1290 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1291 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1292 }
1293 return true;
1294}
1295
3a7be554 1296static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1297{
1298 if (sve_access_check(s)) {
1299 TCGv_i64 start = cpu_reg(s, a->rn);
1300 TCGv_i64 incr = cpu_reg(s, a->rm);
1301 do_index(s, a->esz, a->rd, start, incr);
1302 }
1303 return true;
1304}
1305
96f922cc
RH
1306/*
1307 *** SVE Stack Allocation Group
1308 */
1309
3a7be554 1310static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1311{
5de56742
AC
1312 if (sve_access_check(s)) {
1313 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1314 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1315 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1316 }
96f922cc
RH
1317 return true;
1318}
1319
3a7be554 1320static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1321{
5de56742
AC
1322 if (sve_access_check(s)) {
1323 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1324 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1325 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1326 }
96f922cc
RH
1327 return true;
1328}
1329
3a7be554 1330static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1331{
5de56742
AC
1332 if (sve_access_check(s)) {
1333 TCGv_i64 reg = cpu_reg(s, a->rd);
1334 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1335 }
96f922cc
RH
1336 return true;
1337}
1338
4b242d9c
RH
1339/*
1340 *** SVE Compute Vector Address Group
1341 */
1342
1343static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1344{
1345 if (sve_access_check(s)) {
e645d1a1 1346 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1347 }
1348 return true;
1349}
1350
3a7be554 1351static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1352{
1353 return do_adr(s, a, gen_helper_sve_adr_p32);
1354}
1355
3a7be554 1356static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1357{
1358 return do_adr(s, a, gen_helper_sve_adr_p64);
1359}
1360
3a7be554 1361static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1362{
1363 return do_adr(s, a, gen_helper_sve_adr_s32);
1364}
1365
3a7be554 1366static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1367{
1368 return do_adr(s, a, gen_helper_sve_adr_u32);
1369}
1370
0762cd42
RH
1371/*
1372 *** SVE Integer Misc - Unpredicated Group
1373 */
1374
3a7be554 1375static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
1376{
1377 static gen_helper_gvec_2 * const fns[4] = {
1378 NULL,
1379 gen_helper_sve_fexpa_h,
1380 gen_helper_sve_fexpa_s,
1381 gen_helper_sve_fexpa_d,
1382 };
1383 if (a->esz == 0) {
1384 return false;
1385 }
1386 if (sve_access_check(s)) {
40e32e5a 1387 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1388 }
1389 return true;
1390}
1391
3a7be554 1392static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1393{
1394 static gen_helper_gvec_3 * const fns[4] = {
1395 NULL,
1396 gen_helper_sve_ftssel_h,
1397 gen_helper_sve_ftssel_s,
1398 gen_helper_sve_ftssel_d,
1399 };
1400 if (a->esz == 0) {
1401 return false;
1402 }
1403 if (sve_access_check(s)) {
e645d1a1 1404 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1405 }
1406 return true;
1407}
1408
516e246a
RH
1409/*
1410 *** SVE Predicate Logical Operations Group
1411 */
1412
1413static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1414 const GVecGen4 *gvec_op)
1415{
1416 if (!sve_access_check(s)) {
1417 return true;
1418 }
1419
1420 unsigned psz = pred_gvec_reg_size(s);
1421 int dofs = pred_full_reg_offset(s, a->rd);
1422 int nofs = pred_full_reg_offset(s, a->rn);
1423 int mofs = pred_full_reg_offset(s, a->rm);
1424 int gofs = pred_full_reg_offset(s, a->pg);
1425
dd81a8d7
RH
1426 if (!a->s) {
1427 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1428 return true;
1429 }
1430
516e246a
RH
1431 if (psz == 8) {
1432 /* Do the operation and the flags generation in temps. */
1433 TCGv_i64 pd = tcg_temp_new_i64();
1434 TCGv_i64 pn = tcg_temp_new_i64();
1435 TCGv_i64 pm = tcg_temp_new_i64();
1436 TCGv_i64 pg = tcg_temp_new_i64();
1437
1438 tcg_gen_ld_i64(pn, cpu_env, nofs);
1439 tcg_gen_ld_i64(pm, cpu_env, mofs);
1440 tcg_gen_ld_i64(pg, cpu_env, gofs);
1441
1442 gvec_op->fni8(pd, pn, pm, pg);
1443 tcg_gen_st_i64(pd, cpu_env, dofs);
1444
1445 do_predtest1(pd, pg);
1446
1447 tcg_temp_free_i64(pd);
1448 tcg_temp_free_i64(pn);
1449 tcg_temp_free_i64(pm);
1450 tcg_temp_free_i64(pg);
1451 } else {
1452 /* The operation and flags generation is large. The computation
1453 * of the flags depends on the original contents of the guarding
1454 * predicate. If the destination overwrites the guarding predicate,
1455 * then the easiest way to get this right is to save a copy.
1456 */
1457 int tofs = gofs;
1458 if (a->rd == a->pg) {
1459 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1460 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1461 }
1462
1463 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1464 do_predtest(s, dofs, tofs, psz / 8);
1465 }
1466 return true;
1467}
1468
1469static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1470{
1471 tcg_gen_and_i64(pd, pn, pm);
1472 tcg_gen_and_i64(pd, pd, pg);
1473}
1474
1475static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1476 TCGv_vec pm, TCGv_vec pg)
1477{
1478 tcg_gen_and_vec(vece, pd, pn, pm);
1479 tcg_gen_and_vec(vece, pd, pd, pg);
1480}
1481
3a7be554 1482static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1483{
1484 static const GVecGen4 op = {
1485 .fni8 = gen_and_pg_i64,
1486 .fniv = gen_and_pg_vec,
1487 .fno = gen_helper_sve_and_pppp,
1488 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1489 };
dd81a8d7
RH
1490
1491 if (!a->s) {
1492 if (!sve_access_check(s)) {
1493 return true;
1494 }
1495 if (a->rn == a->rm) {
1496 if (a->pg == a->rn) {
1497 do_mov_p(s, a->rd, a->rn);
1498 } else {
1499 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1500 }
1501 return true;
1502 } else if (a->pg == a->rn || a->pg == a->rm) {
1503 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1504 return true;
516e246a 1505 }
516e246a 1506 }
dd81a8d7 1507 return do_pppp_flags(s, a, &op);
516e246a
RH
1508}
1509
1510static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1511{
1512 tcg_gen_andc_i64(pd, pn, pm);
1513 tcg_gen_and_i64(pd, pd, pg);
1514}
1515
1516static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1517 TCGv_vec pm, TCGv_vec pg)
1518{
1519 tcg_gen_andc_vec(vece, pd, pn, pm);
1520 tcg_gen_and_vec(vece, pd, pd, pg);
1521}
1522
3a7be554 1523static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1524{
1525 static const GVecGen4 op = {
1526 .fni8 = gen_bic_pg_i64,
1527 .fniv = gen_bic_pg_vec,
1528 .fno = gen_helper_sve_bic_pppp,
1529 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1530 };
dd81a8d7
RH
1531
1532 if (!a->s && a->pg == a->rn) {
1533 if (sve_access_check(s)) {
1534 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1535 }
1536 return true;
516e246a 1537 }
dd81a8d7 1538 return do_pppp_flags(s, a, &op);
516e246a
RH
1539}
1540
1541static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1542{
1543 tcg_gen_xor_i64(pd, pn, pm);
1544 tcg_gen_and_i64(pd, pd, pg);
1545}
1546
1547static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1548 TCGv_vec pm, TCGv_vec pg)
1549{
1550 tcg_gen_xor_vec(vece, pd, pn, pm);
1551 tcg_gen_and_vec(vece, pd, pd, pg);
1552}
1553
3a7be554 1554static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1555{
1556 static const GVecGen4 op = {
1557 .fni8 = gen_eor_pg_i64,
1558 .fniv = gen_eor_pg_vec,
1559 .fno = gen_helper_sve_eor_pppp,
1560 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1561 };
dd81a8d7 1562 return do_pppp_flags(s, a, &op);
516e246a
RH
1563}
1564
3a7be554 1565static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1566{
516e246a
RH
1567 if (a->s) {
1568 return false;
516e246a 1569 }
d4bc6232
RH
1570 if (sve_access_check(s)) {
1571 unsigned psz = pred_gvec_reg_size(s);
1572 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1573 pred_full_reg_offset(s, a->pg),
1574 pred_full_reg_offset(s, a->rn),
1575 pred_full_reg_offset(s, a->rm), psz, psz);
1576 }
1577 return true;
516e246a
RH
1578}
1579
1580static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1581{
1582 tcg_gen_or_i64(pd, pn, pm);
1583 tcg_gen_and_i64(pd, pd, pg);
1584}
1585
1586static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1587 TCGv_vec pm, TCGv_vec pg)
1588{
1589 tcg_gen_or_vec(vece, pd, pn, pm);
1590 tcg_gen_and_vec(vece, pd, pd, pg);
1591}
1592
3a7be554 1593static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1594{
1595 static const GVecGen4 op = {
1596 .fni8 = gen_orr_pg_i64,
1597 .fniv = gen_orr_pg_vec,
1598 .fno = gen_helper_sve_orr_pppp,
1599 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1600 };
dd81a8d7
RH
1601
1602 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1603 return do_mov_p(s, a->rd, a->rn);
516e246a 1604 }
dd81a8d7 1605 return do_pppp_flags(s, a, &op);
516e246a
RH
1606}
1607
1608static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1609{
1610 tcg_gen_orc_i64(pd, pn, pm);
1611 tcg_gen_and_i64(pd, pd, pg);
1612}
1613
1614static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1615 TCGv_vec pm, TCGv_vec pg)
1616{
1617 tcg_gen_orc_vec(vece, pd, pn, pm);
1618 tcg_gen_and_vec(vece, pd, pd, pg);
1619}
1620
3a7be554 1621static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1622{
1623 static const GVecGen4 op = {
1624 .fni8 = gen_orn_pg_i64,
1625 .fniv = gen_orn_pg_vec,
1626 .fno = gen_helper_sve_orn_pppp,
1627 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1628 };
dd81a8d7 1629 return do_pppp_flags(s, a, &op);
516e246a
RH
1630}
1631
1632static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1633{
1634 tcg_gen_or_i64(pd, pn, pm);
1635 tcg_gen_andc_i64(pd, pg, pd);
1636}
1637
1638static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1639 TCGv_vec pm, TCGv_vec pg)
1640{
1641 tcg_gen_or_vec(vece, pd, pn, pm);
1642 tcg_gen_andc_vec(vece, pd, pg, pd);
1643}
1644
3a7be554 1645static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1646{
1647 static const GVecGen4 op = {
1648 .fni8 = gen_nor_pg_i64,
1649 .fniv = gen_nor_pg_vec,
1650 .fno = gen_helper_sve_nor_pppp,
1651 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1652 };
dd81a8d7 1653 return do_pppp_flags(s, a, &op);
516e246a
RH
1654}
1655
1656static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1657{
1658 tcg_gen_and_i64(pd, pn, pm);
1659 tcg_gen_andc_i64(pd, pg, pd);
1660}
1661
1662static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1663 TCGv_vec pm, TCGv_vec pg)
1664{
1665 tcg_gen_and_vec(vece, pd, pn, pm);
1666 tcg_gen_andc_vec(vece, pd, pg, pd);
1667}
1668
3a7be554 1669static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1670{
1671 static const GVecGen4 op = {
1672 .fni8 = gen_nand_pg_i64,
1673 .fniv = gen_nand_pg_vec,
1674 .fno = gen_helper_sve_nand_pppp,
1675 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1676 };
dd81a8d7 1677 return do_pppp_flags(s, a, &op);
516e246a
RH
1678}
1679
9e18d7a6
RH
1680/*
1681 *** SVE Predicate Misc Group
1682 */
1683
3a7be554 1684static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1685{
1686 if (sve_access_check(s)) {
1687 int nofs = pred_full_reg_offset(s, a->rn);
1688 int gofs = pred_full_reg_offset(s, a->pg);
1689 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1690
1691 if (words == 1) {
1692 TCGv_i64 pn = tcg_temp_new_i64();
1693 TCGv_i64 pg = tcg_temp_new_i64();
1694
1695 tcg_gen_ld_i64(pn, cpu_env, nofs);
1696 tcg_gen_ld_i64(pg, cpu_env, gofs);
1697 do_predtest1(pn, pg);
1698
1699 tcg_temp_free_i64(pn);
1700 tcg_temp_free_i64(pg);
1701 } else {
1702 do_predtest(s, nofs, gofs, words);
1703 }
1704 }
1705 return true;
1706}
1707
028e2a7b
RH
1708/* See the ARM pseudocode DecodePredCount. */
1709static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1710{
1711 unsigned elements = fullsz >> esz;
1712 unsigned bound;
1713
1714 switch (pattern) {
1715 case 0x0: /* POW2 */
1716 return pow2floor(elements);
1717 case 0x1: /* VL1 */
1718 case 0x2: /* VL2 */
1719 case 0x3: /* VL3 */
1720 case 0x4: /* VL4 */
1721 case 0x5: /* VL5 */
1722 case 0x6: /* VL6 */
1723 case 0x7: /* VL7 */
1724 case 0x8: /* VL8 */
1725 bound = pattern;
1726 break;
1727 case 0x9: /* VL16 */
1728 case 0xa: /* VL32 */
1729 case 0xb: /* VL64 */
1730 case 0xc: /* VL128 */
1731 case 0xd: /* VL256 */
1732 bound = 16 << (pattern - 9);
1733 break;
1734 case 0x1d: /* MUL4 */
1735 return elements - elements % 4;
1736 case 0x1e: /* MUL3 */
1737 return elements - elements % 3;
1738 case 0x1f: /* ALL */
1739 return elements;
1740 default: /* #uimm5 */
1741 return 0;
1742 }
1743 return elements >= bound ? bound : 0;
1744}
1745
1746/* This handles all of the predicate initialization instructions,
1747 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1748 * so that decode_pred_count returns 0. For SETFFR, we will have
1749 * set RD == 16 == FFR.
1750 */
1751static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1752{
1753 if (!sve_access_check(s)) {
1754 return true;
1755 }
1756
1757 unsigned fullsz = vec_full_reg_size(s);
1758 unsigned ofs = pred_full_reg_offset(s, rd);
1759 unsigned numelem, setsz, i;
1760 uint64_t word, lastword;
1761 TCGv_i64 t;
1762
1763 numelem = decode_pred_count(fullsz, pat, esz);
1764
1765 /* Determine what we must store into each bit, and how many. */
1766 if (numelem == 0) {
1767 lastword = word = 0;
1768 setsz = fullsz;
1769 } else {
1770 setsz = numelem << esz;
1771 lastword = word = pred_esz_masks[esz];
1772 if (setsz % 64) {
973558a3 1773 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1774 }
1775 }
1776
1777 t = tcg_temp_new_i64();
1778 if (fullsz <= 64) {
1779 tcg_gen_movi_i64(t, lastword);
1780 tcg_gen_st_i64(t, cpu_env, ofs);
1781 goto done;
1782 }
1783
1784 if (word == lastword) {
1785 unsigned maxsz = size_for_gvec(fullsz / 8);
1786 unsigned oprsz = size_for_gvec(setsz / 8);
1787
1788 if (oprsz * 8 == setsz) {
8711e71f 1789 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1790 goto done;
1791 }
028e2a7b
RH
1792 }
1793
1794 setsz /= 8;
1795 fullsz /= 8;
1796
1797 tcg_gen_movi_i64(t, word);
973558a3 1798 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1799 tcg_gen_st_i64(t, cpu_env, ofs + i);
1800 }
1801 if (lastword != word) {
1802 tcg_gen_movi_i64(t, lastword);
1803 tcg_gen_st_i64(t, cpu_env, ofs + i);
1804 i += 8;
1805 }
1806 if (i < fullsz) {
1807 tcg_gen_movi_i64(t, 0);
1808 for (; i < fullsz; i += 8) {
1809 tcg_gen_st_i64(t, cpu_env, ofs + i);
1810 }
1811 }
1812
1813 done:
1814 tcg_temp_free_i64(t);
1815
1816 /* PTRUES */
1817 if (setflag) {
1818 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1819 tcg_gen_movi_i32(cpu_CF, word == 0);
1820 tcg_gen_movi_i32(cpu_VF, 0);
1821 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1822 }
1823 return true;
1824}
1825
3a7be554 1826static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1827{
1828 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1829}
1830
3a7be554 1831static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1832{
1833 /* Note pat == 31 is #all, to set all elements. */
1834 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1835}
1836
3a7be554 1837static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1838{
1839 /* Note pat == 32 is #unimp, to set no elements. */
1840 return do_predset(s, 0, a->rd, 32, false);
1841}
1842
3a7be554 1843static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1844{
1845 /* The path through do_pppp_flags is complicated enough to want to avoid
1846 * duplication. Frob the arguments into the form of a predicated AND.
1847 */
1848 arg_rprr_s alt_a = {
1849 .rd = a->rd, .pg = a->pg, .s = a->s,
1850 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1851 };
3a7be554 1852 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1853}
1854
3a7be554 1855static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1856{
1857 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1858}
1859
3a7be554 1860static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1861{
1862 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1863}
1864
1865static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1866 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1867 TCGv_ptr, TCGv_i32))
1868{
1869 if (!sve_access_check(s)) {
1870 return true;
1871 }
1872
1873 TCGv_ptr t_pd = tcg_temp_new_ptr();
1874 TCGv_ptr t_pg = tcg_temp_new_ptr();
1875 TCGv_i32 t;
86300b5d 1876 unsigned desc = 0;
028e2a7b 1877
86300b5d
RH
1878 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1879 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1880
1881 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1882 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1883 t = tcg_const_i32(desc);
1884
1885 gen_fn(t, t_pd, t_pg, t);
1886 tcg_temp_free_ptr(t_pd);
1887 tcg_temp_free_ptr(t_pg);
1888
1889 do_pred_flags(t);
1890 tcg_temp_free_i32(t);
1891 return true;
1892}
1893
3a7be554 1894static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1895{
1896 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1897}
1898
3a7be554 1899static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1900{
1901 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1902}
1903
24e82e68
RH
1904/*
1905 *** SVE Element Count Group
1906 */
1907
1908/* Perform an inline saturating addition of a 32-bit value within
1909 * a 64-bit register. The second operand is known to be positive,
1910 * which halves the comparisions we must perform to bound the result.
1911 */
1912static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1913{
1914 int64_t ibound;
24e82e68
RH
1915
1916 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1917 if (u) {
1918 tcg_gen_ext32u_i64(reg, reg);
1919 } else {
1920 tcg_gen_ext32s_i64(reg, reg);
1921 }
1922 if (d) {
1923 tcg_gen_sub_i64(reg, reg, val);
1924 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1925 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1926 } else {
1927 tcg_gen_add_i64(reg, reg, val);
1928 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1929 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1930 }
24e82e68
RH
1931}
1932
1933/* Similarly with 64-bit values. */
1934static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1935{
1936 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1937 TCGv_i64 t2;
1938
1939 if (u) {
1940 if (d) {
1941 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1942 t2 = tcg_constant_i64(0);
1943 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1944 } else {
1945 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1946 t2 = tcg_constant_i64(-1);
1947 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1948 }
1949 } else {
35a1ec8e 1950 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1951 if (d) {
1952 /* Detect signed overflow for subtraction. */
1953 tcg_gen_xor_i64(t0, reg, val);
1954 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1955 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1956 tcg_gen_and_i64(t0, t0, reg);
1957
1958 /* Bound the result. */
1959 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1960 t2 = tcg_constant_i64(0);
24e82e68
RH
1961 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1962 } else {
1963 /* Detect signed overflow for addition. */
1964 tcg_gen_xor_i64(t0, reg, val);
1965 tcg_gen_add_i64(reg, reg, val);
1966 tcg_gen_xor_i64(t1, reg, val);
1967 tcg_gen_andc_i64(t0, t1, t0);
1968
1969 /* Bound the result. */
1970 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1971 t2 = tcg_constant_i64(0);
24e82e68
RH
1972 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1973 }
35a1ec8e 1974 tcg_temp_free_i64(t1);
24e82e68
RH
1975 }
1976 tcg_temp_free_i64(t0);
24e82e68
RH
1977}
1978
1979/* Similarly with a vector and a scalar operand. */
1980static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1981 TCGv_i64 val, bool u, bool d)
1982{
1983 unsigned vsz = vec_full_reg_size(s);
1984 TCGv_ptr dptr, nptr;
1985 TCGv_i32 t32, desc;
1986 TCGv_i64 t64;
1987
1988 dptr = tcg_temp_new_ptr();
1989 nptr = tcg_temp_new_ptr();
1990 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1991 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1992 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1993
1994 switch (esz) {
1995 case MO_8:
1996 t32 = tcg_temp_new_i32();
1997 tcg_gen_extrl_i64_i32(t32, val);
1998 if (d) {
1999 tcg_gen_neg_i32(t32, t32);
2000 }
2001 if (u) {
2002 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
2003 } else {
2004 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2005 }
2006 tcg_temp_free_i32(t32);
2007 break;
2008
2009 case MO_16:
2010 t32 = tcg_temp_new_i32();
2011 tcg_gen_extrl_i64_i32(t32, val);
2012 if (d) {
2013 tcg_gen_neg_i32(t32, t32);
2014 }
2015 if (u) {
2016 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2017 } else {
2018 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2019 }
2020 tcg_temp_free_i32(t32);
2021 break;
2022
2023 case MO_32:
2024 t64 = tcg_temp_new_i64();
2025 if (d) {
2026 tcg_gen_neg_i64(t64, val);
2027 } else {
2028 tcg_gen_mov_i64(t64, val);
2029 }
2030 if (u) {
2031 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2032 } else {
2033 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2034 }
2035 tcg_temp_free_i64(t64);
2036 break;
2037
2038 case MO_64:
2039 if (u) {
2040 if (d) {
2041 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2042 } else {
2043 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2044 }
2045 } else if (d) {
2046 t64 = tcg_temp_new_i64();
2047 tcg_gen_neg_i64(t64, val);
2048 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2049 tcg_temp_free_i64(t64);
2050 } else {
2051 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2052 }
2053 break;
2054
2055 default:
2056 g_assert_not_reached();
2057 }
2058
2059 tcg_temp_free_ptr(dptr);
2060 tcg_temp_free_ptr(nptr);
2061 tcg_temp_free_i32(desc);
2062}
2063
3a7be554 2064static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2065{
2066 if (sve_access_check(s)) {
2067 unsigned fullsz = vec_full_reg_size(s);
2068 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2069 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2070 }
2071 return true;
2072}
2073
3a7be554 2074static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2075{
2076 if (sve_access_check(s)) {
2077 unsigned fullsz = vec_full_reg_size(s);
2078 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2079 int inc = numelem * a->imm * (a->d ? -1 : 1);
2080 TCGv_i64 reg = cpu_reg(s, a->rd);
2081
2082 tcg_gen_addi_i64(reg, reg, inc);
2083 }
2084 return true;
2085}
2086
3a7be554 2087static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2088{
2089 if (!sve_access_check(s)) {
2090 return true;
2091 }
2092
2093 unsigned fullsz = vec_full_reg_size(s);
2094 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2095 int inc = numelem * a->imm;
2096 TCGv_i64 reg = cpu_reg(s, a->rd);
2097
2098 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2099 if (inc == 0) {
2100 if (a->u) {
2101 tcg_gen_ext32u_i64(reg, reg);
2102 } else {
2103 tcg_gen_ext32s_i64(reg, reg);
2104 }
2105 } else {
d681f125 2106 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2107 }
2108 return true;
2109}
2110
3a7be554 2111static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2112{
2113 if (!sve_access_check(s)) {
2114 return true;
2115 }
2116
2117 unsigned fullsz = vec_full_reg_size(s);
2118 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2119 int inc = numelem * a->imm;
2120 TCGv_i64 reg = cpu_reg(s, a->rd);
2121
2122 if (inc != 0) {
d681f125 2123 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2124 }
2125 return true;
2126}
2127
3a7be554 2128static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2129{
2130 if (a->esz == 0) {
2131 return false;
2132 }
2133
2134 unsigned fullsz = vec_full_reg_size(s);
2135 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2136 int inc = numelem * a->imm;
2137
2138 if (inc != 0) {
2139 if (sve_access_check(s)) {
24e82e68
RH
2140 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2141 vec_full_reg_offset(s, a->rn),
d681f125
RH
2142 tcg_constant_i64(a->d ? -inc : inc),
2143 fullsz, fullsz);
24e82e68
RH
2144 }
2145 } else {
2146 do_mov_z(s, a->rd, a->rn);
2147 }
2148 return true;
2149}
2150
3a7be554 2151static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2152{
2153 if (a->esz == 0) {
2154 return false;
2155 }
2156
2157 unsigned fullsz = vec_full_reg_size(s);
2158 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2159 int inc = numelem * a->imm;
2160
2161 if (inc != 0) {
2162 if (sve_access_check(s)) {
d681f125
RH
2163 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2164 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2165 }
2166 } else {
2167 do_mov_z(s, a->rd, a->rn);
2168 }
2169 return true;
2170}
2171
e1fa1164
RH
2172/*
2173 *** SVE Bitwise Immediate Group
2174 */
2175
2176static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2177{
2178 uint64_t imm;
2179 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2180 extract32(a->dbm, 0, 6),
2181 extract32(a->dbm, 6, 6))) {
2182 return false;
2183 }
2184 if (sve_access_check(s)) {
2185 unsigned vsz = vec_full_reg_size(s);
2186 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2187 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2188 }
2189 return true;
2190}
2191
3a7be554 2192static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2193{
2194 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2195}
2196
3a7be554 2197static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2198{
2199 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2200}
2201
3a7be554 2202static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2203{
2204 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2205}
2206
3a7be554 2207static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2208{
2209 uint64_t imm;
2210 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2211 extract32(a->dbm, 0, 6),
2212 extract32(a->dbm, 6, 6))) {
2213 return false;
2214 }
2215 if (sve_access_check(s)) {
2216 do_dupi_z(s, a->rd, imm);
2217 }
2218 return true;
2219}
2220
f25a2361
RH
2221/*
2222 *** SVE Integer Wide Immediate - Predicated Group
2223 */
2224
2225/* Implement all merging copies. This is used for CPY (immediate),
2226 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2227 */
2228static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2229 TCGv_i64 val)
2230{
2231 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2232 static gen_cpy * const fns[4] = {
2233 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2234 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2235 };
2236 unsigned vsz = vec_full_reg_size(s);
2237 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2238 TCGv_ptr t_zd = tcg_temp_new_ptr();
2239 TCGv_ptr t_zn = tcg_temp_new_ptr();
2240 TCGv_ptr t_pg = tcg_temp_new_ptr();
2241
2242 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2243 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2244 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2245
2246 fns[esz](t_zd, t_zn, t_pg, val, desc);
2247
2248 tcg_temp_free_ptr(t_zd);
2249 tcg_temp_free_ptr(t_zn);
2250 tcg_temp_free_ptr(t_pg);
2251 tcg_temp_free_i32(desc);
2252}
2253
3a7be554 2254static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2255{
2256 if (a->esz == 0) {
2257 return false;
2258 }
2259 if (sve_access_check(s)) {
2260 /* Decode the VFP immediate. */
2261 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2262 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2263 }
2264 return true;
2265}
2266
3a7be554 2267static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2268{
3a7be554 2269 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2270 return false;
2271 }
2272 if (sve_access_check(s)) {
e152b48b 2273 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2274 }
2275 return true;
2276}
2277
3a7be554 2278static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2279{
2280 static gen_helper_gvec_2i * const fns[4] = {
2281 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2282 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2283 };
2284
3a7be554 2285 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2286 return false;
2287 }
2288 if (sve_access_check(s)) {
2289 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2290 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2291 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2292 tcg_constant_i64(a->imm),
2293 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2294 }
2295 return true;
2296}
2297
b94f8f60
RH
2298/*
2299 *** SVE Permute Extract Group
2300 */
2301
75114792 2302static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2303{
2304 if (!sve_access_check(s)) {
2305 return true;
2306 }
2307
2308 unsigned vsz = vec_full_reg_size(s);
75114792 2309 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2310 unsigned n_siz = vsz - n_ofs;
75114792
SL
2311 unsigned d = vec_full_reg_offset(s, rd);
2312 unsigned n = vec_full_reg_offset(s, rn);
2313 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2314
2315 /* Use host vector move insns if we have appropriate sizes
2316 * and no unfortunate overlap.
2317 */
2318 if (m != d
2319 && n_ofs == size_for_gvec(n_ofs)
2320 && n_siz == size_for_gvec(n_siz)
2321 && (d != n || n_siz <= n_ofs)) {
2322 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2323 if (n_ofs != 0) {
2324 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2325 }
2326 } else {
2327 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2328 }
2329 return true;
2330}
2331
75114792
SL
2332static bool trans_EXT(DisasContext *s, arg_EXT *a)
2333{
2334 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2335}
2336
2337static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2338{
2339 if (!dc_isar_feature(aa64_sve2, s)) {
2340 return false;
2341 }
2342 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2343}
2344
30562ab7
RH
2345/*
2346 *** SVE Permute - Unpredicated Group
2347 */
2348
3a7be554 2349static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2350{
2351 if (sve_access_check(s)) {
2352 unsigned vsz = vec_full_reg_size(s);
2353 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2354 vsz, vsz, cpu_reg_sp(s, a->rn));
2355 }
2356 return true;
2357}
2358
3a7be554 2359static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2360{
2361 if ((a->imm & 0x1f) == 0) {
2362 return false;
2363 }
2364 if (sve_access_check(s)) {
2365 unsigned vsz = vec_full_reg_size(s);
2366 unsigned dofs = vec_full_reg_offset(s, a->rd);
2367 unsigned esz, index;
2368
2369 esz = ctz32(a->imm);
2370 index = a->imm >> (esz + 1);
2371
2372 if ((index << esz) < vsz) {
2373 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2374 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2375 } else {
7e17d50e
RH
2376 /*
2377 * While dup_mem handles 128-bit elements, dup_imm does not.
2378 * Thankfully element size doesn't matter for splatting zero.
2379 */
2380 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2381 }
2382 }
2383 return true;
2384}
2385
2386static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2387{
2388 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2389 static gen_insr * const fns[4] = {
2390 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2391 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2392 };
2393 unsigned vsz = vec_full_reg_size(s);
2394 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2395 TCGv_ptr t_zd = tcg_temp_new_ptr();
2396 TCGv_ptr t_zn = tcg_temp_new_ptr();
2397
2398 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2399 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2400
2401 fns[a->esz](t_zd, t_zn, val, desc);
2402
2403 tcg_temp_free_ptr(t_zd);
2404 tcg_temp_free_ptr(t_zn);
2405 tcg_temp_free_i32(desc);
2406}
2407
3a7be554 2408static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2409{
2410 if (sve_access_check(s)) {
2411 TCGv_i64 t = tcg_temp_new_i64();
2412 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2413 do_insr_i64(s, a, t);
2414 tcg_temp_free_i64(t);
2415 }
2416 return true;
2417}
2418
3a7be554 2419static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2420{
2421 if (sve_access_check(s)) {
2422 do_insr_i64(s, a, cpu_reg(s, a->rm));
2423 }
2424 return true;
2425}
2426
3a7be554 2427static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2428{
2429 static gen_helper_gvec_2 * const fns[4] = {
2430 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2431 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2432 };
2433
2434 if (sve_access_check(s)) {
40e32e5a 2435 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2436 }
2437 return true;
2438}
2439
3a7be554 2440static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2441{
2442 static gen_helper_gvec_3 * const fns[4] = {
2443 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2444 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2445 };
2446
2447 if (sve_access_check(s)) {
e645d1a1 2448 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2449 }
2450 return true;
2451}
2452
80a712a2
SL
2453static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2454{
2455 static gen_helper_gvec_4 * const fns[4] = {
2456 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2457 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2458 };
2459
2460 if (!dc_isar_feature(aa64_sve2, s)) {
2461 return false;
2462 }
2463 if (sve_access_check(s)) {
2464 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2465 (a->rn + 1) % 32, a->rm, 0);
2466 }
2467 return true;
2468}
2469
2470static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2471{
2472 static gen_helper_gvec_3 * const fns[4] = {
2473 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2474 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2475 };
2476
2477 if (!dc_isar_feature(aa64_sve2, s)) {
2478 return false;
2479 }
2480 if (sve_access_check(s)) {
2481 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2482 }
2483 return true;
2484}
2485
3a7be554 2486static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2487{
2488 static gen_helper_gvec_2 * const fns[4][2] = {
2489 { NULL, NULL },
2490 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2491 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2492 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2493 };
2494
2495 if (a->esz == 0) {
2496 return false;
2497 }
2498 if (sve_access_check(s)) {
2499 unsigned vsz = vec_full_reg_size(s);
2500 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2501 vec_full_reg_offset(s, a->rn)
2502 + (a->h ? vsz / 2 : 0),
2503 vsz, vsz, 0, fns[a->esz][a->u]);
2504 }
2505 return true;
2506}
2507
d731d8cb
RH
2508/*
2509 *** SVE Permute - Predicates Group
2510 */
2511
2512static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2513 gen_helper_gvec_3 *fn)
2514{
2515 if (!sve_access_check(s)) {
2516 return true;
2517 }
2518
2519 unsigned vsz = pred_full_reg_size(s);
2520
d731d8cb
RH
2521 TCGv_ptr t_d = tcg_temp_new_ptr();
2522 TCGv_ptr t_n = tcg_temp_new_ptr();
2523 TCGv_ptr t_m = tcg_temp_new_ptr();
2524 TCGv_i32 t_desc;
f9b0fcce 2525 uint32_t desc = 0;
d731d8cb 2526
f9b0fcce
RH
2527 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2528 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2529 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2530
2531 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2532 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2533 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2534 t_desc = tcg_const_i32(desc);
2535
2536 fn(t_d, t_n, t_m, t_desc);
2537
2538 tcg_temp_free_ptr(t_d);
2539 tcg_temp_free_ptr(t_n);
2540 tcg_temp_free_ptr(t_m);
2541 tcg_temp_free_i32(t_desc);
2542 return true;
2543}
2544
2545static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2546 gen_helper_gvec_2 *fn)
2547{
2548 if (!sve_access_check(s)) {
2549 return true;
2550 }
2551
2552 unsigned vsz = pred_full_reg_size(s);
2553 TCGv_ptr t_d = tcg_temp_new_ptr();
2554 TCGv_ptr t_n = tcg_temp_new_ptr();
2555 TCGv_i32 t_desc;
70acaafe 2556 uint32_t desc = 0;
d731d8cb
RH
2557
2558 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2559 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2560
70acaafe
RH
2561 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2562 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2563 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2564 t_desc = tcg_const_i32(desc);
2565
2566 fn(t_d, t_n, t_desc);
2567
2568 tcg_temp_free_i32(t_desc);
2569 tcg_temp_free_ptr(t_d);
2570 tcg_temp_free_ptr(t_n);
2571 return true;
2572}
2573
3a7be554 2574static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2575{
2576 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2577}
2578
3a7be554 2579static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2580{
2581 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2582}
2583
3a7be554 2584static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2585{
2586 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2587}
2588
3a7be554 2589static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2590{
2591 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2592}
2593
3a7be554 2594static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2595{
2596 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2597}
2598
3a7be554 2599static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2600{
2601 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2602}
2603
3a7be554 2604static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2605{
2606 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2607}
2608
3a7be554 2609static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2610{
2611 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2612}
2613
3a7be554 2614static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2615{
2616 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2617}
2618
234b48e9
RH
2619/*
2620 *** SVE Permute - Interleaving Group
2621 */
2622
2623static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2624{
2625 static gen_helper_gvec_3 * const fns[4] = {
2626 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2627 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2628 };
2629
2630 if (sve_access_check(s)) {
2631 unsigned vsz = vec_full_reg_size(s);
2632 unsigned high_ofs = high ? vsz / 2 : 0;
2633 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2634 vec_full_reg_offset(s, a->rn) + high_ofs,
2635 vec_full_reg_offset(s, a->rm) + high_ofs,
2636 vsz, vsz, 0, fns[a->esz]);
2637 }
2638 return true;
2639}
2640
2641static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2642 gen_helper_gvec_3 *fn)
2643{
2644 if (sve_access_check(s)) {
e645d1a1 2645 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2646 }
2647 return true;
2648}
2649
3a7be554 2650static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2651{
2652 return do_zip(s, a, false);
2653}
2654
3a7be554 2655static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2656{
2657 return do_zip(s, a, true);
2658}
2659
74b64b25
RH
2660static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2661{
2662 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2663 return false;
2664 }
2665 if (sve_access_check(s)) {
2666 unsigned vsz = vec_full_reg_size(s);
2667 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2668 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2669 vec_full_reg_offset(s, a->rn) + high_ofs,
2670 vec_full_reg_offset(s, a->rm) + high_ofs,
2671 vsz, vsz, 0, gen_helper_sve2_zip_q);
2672 }
2673 return true;
2674}
2675
2676static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2677{
2678 return do_zip_q(s, a, false);
2679}
2680
2681static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2682{
2683 return do_zip_q(s, a, true);
2684}
2685
234b48e9
RH
2686static gen_helper_gvec_3 * const uzp_fns[4] = {
2687 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2688 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2689};
2690
3a7be554 2691static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2692{
2693 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2694}
2695
3a7be554 2696static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2697{
2698 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2699}
2700
74b64b25
RH
2701static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2702{
2703 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2704 return false;
2705 }
2706 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
2707}
2708
2709static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2710{
2711 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2712 return false;
2713 }
2714 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
2715}
2716
234b48e9
RH
2717static gen_helper_gvec_3 * const trn_fns[4] = {
2718 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2719 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2720};
2721
3a7be554 2722static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2723{
2724 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2725}
2726
3a7be554 2727static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2728{
2729 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2730}
2731
74b64b25
RH
2732static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2733{
2734 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2735 return false;
2736 }
2737 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
2738}
2739
2740static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2741{
2742 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2743 return false;
2744 }
2745 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
2746}
2747
3ca879ae
RH
2748/*
2749 *** SVE Permute Vector - Predicated Group
2750 */
2751
3a7be554 2752static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2753{
2754 static gen_helper_gvec_3 * const fns[4] = {
2755 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2756 };
2757 return do_zpz_ool(s, a, fns[a->esz]);
2758}
2759
ef23cb72
RH
2760/* Call the helper that computes the ARM LastActiveElement pseudocode
2761 * function, scaled by the element size. This includes the not found
2762 * indication; e.g. not found for esz=3 is -8.
2763 */
2764static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2765{
2766 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2767 * round up, as we do elsewhere, because we need the exact size.
2768 */
2769 TCGv_ptr t_p = tcg_temp_new_ptr();
2770 TCGv_i32 t_desc;
2acbfbe4 2771 unsigned desc = 0;
ef23cb72 2772
2acbfbe4
RH
2773 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2774 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2775
2776 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2777 t_desc = tcg_const_i32(desc);
2778
2779 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2780
2781 tcg_temp_free_i32(t_desc);
2782 tcg_temp_free_ptr(t_p);
2783}
2784
2785/* Increment LAST to the offset of the next element in the vector,
2786 * wrapping around to 0.
2787 */
2788static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2789{
2790 unsigned vsz = vec_full_reg_size(s);
2791
2792 tcg_gen_addi_i32(last, last, 1 << esz);
2793 if (is_power_of_2(vsz)) {
2794 tcg_gen_andi_i32(last, last, vsz - 1);
2795 } else {
4b308bd5
RH
2796 TCGv_i32 max = tcg_constant_i32(vsz);
2797 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2798 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2799 }
2800}
2801
2802/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2803static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2804{
2805 unsigned vsz = vec_full_reg_size(s);
2806
2807 if (is_power_of_2(vsz)) {
2808 tcg_gen_andi_i32(last, last, vsz - 1);
2809 } else {
4b308bd5
RH
2810 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2811 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2812 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2813 }
2814}
2815
2816/* Load an unsigned element of ESZ from BASE+OFS. */
2817static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2818{
2819 TCGv_i64 r = tcg_temp_new_i64();
2820
2821 switch (esz) {
2822 case 0:
2823 tcg_gen_ld8u_i64(r, base, ofs);
2824 break;
2825 case 1:
2826 tcg_gen_ld16u_i64(r, base, ofs);
2827 break;
2828 case 2:
2829 tcg_gen_ld32u_i64(r, base, ofs);
2830 break;
2831 case 3:
2832 tcg_gen_ld_i64(r, base, ofs);
2833 break;
2834 default:
2835 g_assert_not_reached();
2836 }
2837 return r;
2838}
2839
2840/* Load an unsigned element of ESZ from RM[LAST]. */
2841static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2842 int rm, int esz)
2843{
2844 TCGv_ptr p = tcg_temp_new_ptr();
2845 TCGv_i64 r;
2846
2847 /* Convert offset into vector into offset into ENV.
2848 * The final adjustment for the vector register base
2849 * is added via constant offset to the load.
2850 */
e03b5686 2851#if HOST_BIG_ENDIAN
ef23cb72
RH
2852 /* Adjust for element ordering. See vec_reg_offset. */
2853 if (esz < 3) {
2854 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2855 }
2856#endif
2857 tcg_gen_ext_i32_ptr(p, last);
2858 tcg_gen_add_ptr(p, p, cpu_env);
2859
2860 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2861 tcg_temp_free_ptr(p);
2862
2863 return r;
2864}
2865
2866/* Compute CLAST for a Zreg. */
2867static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2868{
2869 TCGv_i32 last;
2870 TCGLabel *over;
2871 TCGv_i64 ele;
2872 unsigned vsz, esz = a->esz;
2873
2874 if (!sve_access_check(s)) {
2875 return true;
2876 }
2877
2878 last = tcg_temp_local_new_i32();
2879 over = gen_new_label();
2880
2881 find_last_active(s, last, esz, a->pg);
2882
2883 /* There is of course no movcond for a 2048-bit vector,
2884 * so we must branch over the actual store.
2885 */
2886 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2887
2888 if (!before) {
2889 incr_last_active(s, last, esz);
2890 }
2891
2892 ele = load_last_active(s, last, a->rm, esz);
2893 tcg_temp_free_i32(last);
2894
2895 vsz = vec_full_reg_size(s);
2896 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2897 tcg_temp_free_i64(ele);
2898
2899 /* If this insn used MOVPRFX, we may need a second move. */
2900 if (a->rd != a->rn) {
2901 TCGLabel *done = gen_new_label();
2902 tcg_gen_br(done);
2903
2904 gen_set_label(over);
2905 do_mov_z(s, a->rd, a->rn);
2906
2907 gen_set_label(done);
2908 } else {
2909 gen_set_label(over);
2910 }
2911 return true;
2912}
2913
3a7be554 2914static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2915{
2916 return do_clast_vector(s, a, false);
2917}
2918
3a7be554 2919static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2920{
2921 return do_clast_vector(s, a, true);
2922}
2923
2924/* Compute CLAST for a scalar. */
2925static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2926 bool before, TCGv_i64 reg_val)
2927{
2928 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2929 TCGv_i64 ele, cmp;
ef23cb72
RH
2930
2931 find_last_active(s, last, esz, pg);
2932
2933 /* Extend the original value of last prior to incrementing. */
2934 cmp = tcg_temp_new_i64();
2935 tcg_gen_ext_i32_i64(cmp, last);
2936
2937 if (!before) {
2938 incr_last_active(s, last, esz);
2939 }
2940
2941 /* The conceit here is that while last < 0 indicates not found, after
2942 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2943 * from which we can load garbage. We then discard the garbage with
2944 * a conditional move.
2945 */
2946 ele = load_last_active(s, last, rm, esz);
2947 tcg_temp_free_i32(last);
2948
053552d3
RH
2949 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2950 ele, reg_val);
ef23cb72 2951
ef23cb72
RH
2952 tcg_temp_free_i64(cmp);
2953 tcg_temp_free_i64(ele);
2954}
2955
2956/* Compute CLAST for a Vreg. */
2957static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2958{
2959 if (sve_access_check(s)) {
2960 int esz = a->esz;
2961 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2962 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2963
2964 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2965 write_fp_dreg(s, a->rd, reg);
2966 tcg_temp_free_i64(reg);
2967 }
2968 return true;
2969}
2970
3a7be554 2971static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2972{
2973 return do_clast_fp(s, a, false);
2974}
2975
3a7be554 2976static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2977{
2978 return do_clast_fp(s, a, true);
2979}
2980
2981/* Compute CLAST for a Xreg. */
2982static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2983{
2984 TCGv_i64 reg;
2985
2986 if (!sve_access_check(s)) {
2987 return true;
2988 }
2989
2990 reg = cpu_reg(s, a->rd);
2991 switch (a->esz) {
2992 case 0:
2993 tcg_gen_ext8u_i64(reg, reg);
2994 break;
2995 case 1:
2996 tcg_gen_ext16u_i64(reg, reg);
2997 break;
2998 case 2:
2999 tcg_gen_ext32u_i64(reg, reg);
3000 break;
3001 case 3:
3002 break;
3003 default:
3004 g_assert_not_reached();
3005 }
3006
3007 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
3008 return true;
3009}
3010
3a7be554 3011static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3012{
3013 return do_clast_general(s, a, false);
3014}
3015
3a7be554 3016static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3017{
3018 return do_clast_general(s, a, true);
3019}
3020
3021/* Compute LAST for a scalar. */
3022static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
3023 int pg, int rm, bool before)
3024{
3025 TCGv_i32 last = tcg_temp_new_i32();
3026 TCGv_i64 ret;
3027
3028 find_last_active(s, last, esz, pg);
3029 if (before) {
3030 wrap_last_active(s, last, esz);
3031 } else {
3032 incr_last_active(s, last, esz);
3033 }
3034
3035 ret = load_last_active(s, last, rm, esz);
3036 tcg_temp_free_i32(last);
3037 return ret;
3038}
3039
3040/* Compute LAST for a Vreg. */
3041static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
3042{
3043 if (sve_access_check(s)) {
3044 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3045 write_fp_dreg(s, a->rd, val);
3046 tcg_temp_free_i64(val);
3047 }
3048 return true;
3049}
3050
3a7be554 3051static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3052{
3053 return do_last_fp(s, a, false);
3054}
3055
3a7be554 3056static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3057{
3058 return do_last_fp(s, a, true);
3059}
3060
3061/* Compute LAST for a Xreg. */
3062static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3063{
3064 if (sve_access_check(s)) {
3065 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3066 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3067 tcg_temp_free_i64(val);
3068 }
3069 return true;
3070}
3071
3a7be554 3072static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3073{
3074 return do_last_general(s, a, false);
3075}
3076
3a7be554 3077static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
3078{
3079 return do_last_general(s, a, true);
3080}
3081
3a7be554 3082static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3083{
3084 if (sve_access_check(s)) {
3085 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3086 }
3087 return true;
3088}
3089
3a7be554 3090static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
3091{
3092 if (sve_access_check(s)) {
3093 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3094 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3095 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3096 tcg_temp_free_i64(t);
3097 }
3098 return true;
3099}
3100
3a7be554 3101static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3102{
3103 static gen_helper_gvec_3 * const fns[4] = {
3104 NULL,
3105 gen_helper_sve_revb_h,
3106 gen_helper_sve_revb_s,
3107 gen_helper_sve_revb_d,
3108 };
3109 return do_zpz_ool(s, a, fns[a->esz]);
3110}
3111
3a7be554 3112static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3113{
3114 static gen_helper_gvec_3 * const fns[4] = {
3115 NULL,
3116 NULL,
3117 gen_helper_sve_revh_s,
3118 gen_helper_sve_revh_d,
3119 };
3120 return do_zpz_ool(s, a, fns[a->esz]);
3121}
3122
3a7be554 3123static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3124{
3125 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3126}
3127
3a7be554 3128static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
3129{
3130 static gen_helper_gvec_3 * const fns[4] = {
3131 gen_helper_sve_rbit_b,
3132 gen_helper_sve_rbit_h,
3133 gen_helper_sve_rbit_s,
3134 gen_helper_sve_rbit_d,
3135 };
3136 return do_zpz_ool(s, a, fns[a->esz]);
3137}
3138
3a7be554 3139static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
3140{
3141 if (sve_access_check(s)) {
36cbb7a8 3142 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 3143 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
3144 }
3145 return true;
3146}
3147
75114792
SL
3148static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3149{
3150 if (!dc_isar_feature(aa64_sve2, s)) {
3151 return false;
3152 }
3153 if (sve_access_check(s)) {
3154 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3155 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3156 }
3157 return true;
3158}
3159
757f9cff
RH
3160/*
3161 *** SVE Integer Compare - Vectors Group
3162 */
3163
3164static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3165 gen_helper_gvec_flags_4 *gen_fn)
3166{
3167 TCGv_ptr pd, zn, zm, pg;
3168 unsigned vsz;
3169 TCGv_i32 t;
3170
3171 if (gen_fn == NULL) {
3172 return false;
3173 }
3174 if (!sve_access_check(s)) {
3175 return true;
3176 }
3177
3178 vsz = vec_full_reg_size(s);
3179 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
3180 pd = tcg_temp_new_ptr();
3181 zn = tcg_temp_new_ptr();
3182 zm = tcg_temp_new_ptr();
3183 pg = tcg_temp_new_ptr();
3184
3185 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3186 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3187 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3188 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3189
3190 gen_fn(t, pd, zn, zm, pg, t);
3191
3192 tcg_temp_free_ptr(pd);
3193 tcg_temp_free_ptr(zn);
3194 tcg_temp_free_ptr(zm);
3195 tcg_temp_free_ptr(pg);
3196
3197 do_pred_flags(t);
3198
3199 tcg_temp_free_i32(t);
3200 return true;
3201}
3202
3203#define DO_PPZZ(NAME, name) \
3a7be554 3204static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3205{ \
3206 static gen_helper_gvec_flags_4 * const fns[4] = { \
3207 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3208 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3209 }; \
3210 return do_ppzz_flags(s, a, fns[a->esz]); \
3211}
3212
3213DO_PPZZ(CMPEQ, cmpeq)
3214DO_PPZZ(CMPNE, cmpne)
3215DO_PPZZ(CMPGT, cmpgt)
3216DO_PPZZ(CMPGE, cmpge)
3217DO_PPZZ(CMPHI, cmphi)
3218DO_PPZZ(CMPHS, cmphs)
3219
3220#undef DO_PPZZ
3221
3222#define DO_PPZW(NAME, name) \
3a7be554 3223static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
3224{ \
3225 static gen_helper_gvec_flags_4 * const fns[4] = { \
3226 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3227 gen_helper_sve_##name##_ppzw_s, NULL \
3228 }; \
3229 return do_ppzz_flags(s, a, fns[a->esz]); \
3230}
3231
3232DO_PPZW(CMPEQ, cmpeq)
3233DO_PPZW(CMPNE, cmpne)
3234DO_PPZW(CMPGT, cmpgt)
3235DO_PPZW(CMPGE, cmpge)
3236DO_PPZW(CMPHI, cmphi)
3237DO_PPZW(CMPHS, cmphs)
3238DO_PPZW(CMPLT, cmplt)
3239DO_PPZW(CMPLE, cmple)
3240DO_PPZW(CMPLO, cmplo)
3241DO_PPZW(CMPLS, cmpls)
3242
3243#undef DO_PPZW
3244
38cadeba
RH
3245/*
3246 *** SVE Integer Compare - Immediate Groups
3247 */
3248
3249static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3250 gen_helper_gvec_flags_3 *gen_fn)
3251{
3252 TCGv_ptr pd, zn, pg;
3253 unsigned vsz;
3254 TCGv_i32 t;
3255
3256 if (gen_fn == NULL) {
3257 return false;
3258 }
3259 if (!sve_access_check(s)) {
3260 return true;
3261 }
3262
3263 vsz = vec_full_reg_size(s);
3264 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3265 pd = tcg_temp_new_ptr();
3266 zn = tcg_temp_new_ptr();
3267 pg = tcg_temp_new_ptr();
3268
3269 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3270 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3271 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3272
3273 gen_fn(t, pd, zn, pg, t);
3274
3275 tcg_temp_free_ptr(pd);
3276 tcg_temp_free_ptr(zn);
3277 tcg_temp_free_ptr(pg);
3278
3279 do_pred_flags(t);
3280
3281 tcg_temp_free_i32(t);
3282 return true;
3283}
3284
3285#define DO_PPZI(NAME, name) \
3a7be554 3286static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3287{ \
3288 static gen_helper_gvec_flags_3 * const fns[4] = { \
3289 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3290 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3291 }; \
3292 return do_ppzi_flags(s, a, fns[a->esz]); \
3293}
3294
3295DO_PPZI(CMPEQ, cmpeq)
3296DO_PPZI(CMPNE, cmpne)
3297DO_PPZI(CMPGT, cmpgt)
3298DO_PPZI(CMPGE, cmpge)
3299DO_PPZI(CMPHI, cmphi)
3300DO_PPZI(CMPHS, cmphs)
3301DO_PPZI(CMPLT, cmplt)
3302DO_PPZI(CMPLE, cmple)
3303DO_PPZI(CMPLO, cmplo)
3304DO_PPZI(CMPLS, cmpls)
3305
3306#undef DO_PPZI
3307
35da316f
RH
3308/*
3309 *** SVE Partition Break Group
3310 */
3311
3312static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3313 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3314{
3315 if (!sve_access_check(s)) {
3316 return true;
3317 }
3318
3319 unsigned vsz = pred_full_reg_size(s);
3320
3321 /* Predicate sizes may be smaller and cannot use simd_desc. */
3322 TCGv_ptr d = tcg_temp_new_ptr();
3323 TCGv_ptr n = tcg_temp_new_ptr();
3324 TCGv_ptr m = tcg_temp_new_ptr();
3325 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3326 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3327
3328 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3329 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3330 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3331 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3332
3333 if (a->s) {
3334 fn_s(t, d, n, m, g, t);
3335 do_pred_flags(t);
3336 } else {
3337 fn(d, n, m, g, t);
3338 }
3339 tcg_temp_free_ptr(d);
3340 tcg_temp_free_ptr(n);
3341 tcg_temp_free_ptr(m);
3342 tcg_temp_free_ptr(g);
3343 tcg_temp_free_i32(t);
3344 return true;
3345}
3346
3347static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3348 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3349{
3350 if (!sve_access_check(s)) {
3351 return true;
3352 }
3353
3354 unsigned vsz = pred_full_reg_size(s);
3355
3356 /* Predicate sizes may be smaller and cannot use simd_desc. */
3357 TCGv_ptr d = tcg_temp_new_ptr();
3358 TCGv_ptr n = tcg_temp_new_ptr();
3359 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3360 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3361
3362 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3363 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3364 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3365
3366 if (a->s) {
3367 fn_s(t, d, n, g, t);
3368 do_pred_flags(t);
3369 } else {
3370 fn(d, n, g, t);
3371 }
3372 tcg_temp_free_ptr(d);
3373 tcg_temp_free_ptr(n);
3374 tcg_temp_free_ptr(g);
3375 tcg_temp_free_i32(t);
3376 return true;
3377}
3378
3a7be554 3379static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3380{
3381 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3382}
3383
3a7be554 3384static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3385{
3386 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3387}
3388
3a7be554 3389static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3390{
3391 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3392}
3393
3a7be554 3394static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3395{
3396 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3397}
3398
3a7be554 3399static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3400{
3401 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3402}
3403
3a7be554 3404static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3405{
3406 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3407}
3408
3a7be554 3409static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3410{
3411 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3412}
3413
9ee3a611
RH
3414/*
3415 *** SVE Predicate Count Group
3416 */
3417
3418static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3419{
3420 unsigned psz = pred_full_reg_size(s);
3421
3422 if (psz <= 8) {
3423 uint64_t psz_mask;
3424
3425 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3426 if (pn != pg) {
3427 TCGv_i64 g = tcg_temp_new_i64();
3428 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3429 tcg_gen_and_i64(val, val, g);
3430 tcg_temp_free_i64(g);
3431 }
3432
3433 /* Reduce the pred_esz_masks value simply to reduce the
3434 * size of the code generated here.
3435 */
3436 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3437 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3438
3439 tcg_gen_ctpop_i64(val, val);
3440 } else {
3441 TCGv_ptr t_pn = tcg_temp_new_ptr();
3442 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3443 unsigned desc = 0;
9ee3a611
RH
3444 TCGv_i32 t_desc;
3445
f556a201
RH
3446 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3447 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3448
3449 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3450 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3451 t_desc = tcg_const_i32(desc);
3452
3453 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3454 tcg_temp_free_ptr(t_pn);
3455 tcg_temp_free_ptr(t_pg);
3456 tcg_temp_free_i32(t_desc);
3457 }
3458}
3459
3a7be554 3460static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3461{
3462 if (sve_access_check(s)) {
3463 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3464 }
3465 return true;
3466}
3467
3a7be554 3468static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3469{
3470 if (sve_access_check(s)) {
3471 TCGv_i64 reg = cpu_reg(s, a->rd);
3472 TCGv_i64 val = tcg_temp_new_i64();
3473
3474 do_cntp(s, val, a->esz, a->pg, a->pg);
3475 if (a->d) {
3476 tcg_gen_sub_i64(reg, reg, val);
3477 } else {
3478 tcg_gen_add_i64(reg, reg, val);
3479 }
3480 tcg_temp_free_i64(val);
3481 }
3482 return true;
3483}
3484
3a7be554 3485static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3486{
3487 if (a->esz == 0) {
3488 return false;
3489 }
3490 if (sve_access_check(s)) {
3491 unsigned vsz = vec_full_reg_size(s);
3492 TCGv_i64 val = tcg_temp_new_i64();
3493 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3494
3495 do_cntp(s, val, a->esz, a->pg, a->pg);
3496 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3497 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3498 }
3499 return true;
3500}
3501
3a7be554 3502static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3503{
3504 if (sve_access_check(s)) {
3505 TCGv_i64 reg = cpu_reg(s, a->rd);
3506 TCGv_i64 val = tcg_temp_new_i64();
3507
3508 do_cntp(s, val, a->esz, a->pg, a->pg);
3509 do_sat_addsub_32(reg, val, a->u, a->d);
3510 }
3511 return true;
3512}
3513
3a7be554 3514static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3515{
3516 if (sve_access_check(s)) {
3517 TCGv_i64 reg = cpu_reg(s, a->rd);
3518 TCGv_i64 val = tcg_temp_new_i64();
3519
3520 do_cntp(s, val, a->esz, a->pg, a->pg);
3521 do_sat_addsub_64(reg, val, a->u, a->d);
3522 }
3523 return true;
3524}
3525
3a7be554 3526static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3527{
3528 if (a->esz == 0) {
3529 return false;
3530 }
3531 if (sve_access_check(s)) {
3532 TCGv_i64 val = tcg_temp_new_i64();
3533 do_cntp(s, val, a->esz, a->pg, a->pg);
3534 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3535 }
3536 return true;
3537}
3538
caf1cefc
RH
3539/*
3540 *** SVE Integer Compare Scalars Group
3541 */
3542
3a7be554 3543static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3544{
3545 if (!sve_access_check(s)) {
3546 return true;
3547 }
3548
3549 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3550 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3551 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3552 TCGv_i64 cmp = tcg_temp_new_i64();
3553
3554 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3555 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3556 tcg_temp_free_i64(cmp);
3557
3558 /* VF = !NF & !CF. */
3559 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3560 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3561
3562 /* Both NF and VF actually look at bit 31. */
3563 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3564 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3565 return true;
3566}
3567
3a7be554 3568static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3569{
bbd0968c 3570 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3571 TCGv_i32 t2;
caf1cefc 3572 TCGv_ptr ptr;
e610906c
RH
3573 unsigned vsz = vec_full_reg_size(s);
3574 unsigned desc = 0;
caf1cefc 3575 TCGCond cond;
34688dbc
RH
3576 uint64_t maxval;
3577 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3578 bool eq = a->eq == a->lt;
caf1cefc 3579
34688dbc
RH
3580 /* The greater-than conditions are all SVE2. */
3581 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3582 return false;
3583 }
bbd0968c
RH
3584 if (!sve_access_check(s)) {
3585 return true;
3586 }
3587
3588 op0 = read_cpu_reg(s, a->rn, 1);
3589 op1 = read_cpu_reg(s, a->rm, 1);
3590
caf1cefc
RH
3591 if (!a->sf) {
3592 if (a->u) {
3593 tcg_gen_ext32u_i64(op0, op0);
3594 tcg_gen_ext32u_i64(op1, op1);
3595 } else {
3596 tcg_gen_ext32s_i64(op0, op0);
3597 tcg_gen_ext32s_i64(op1, op1);
3598 }
3599 }
3600
3601 /* For the helper, compress the different conditions into a computation
3602 * of how many iterations for which the condition is true.
caf1cefc 3603 */
bbd0968c
RH
3604 t0 = tcg_temp_new_i64();
3605 t1 = tcg_temp_new_i64();
34688dbc
RH
3606
3607 if (a->lt) {
3608 tcg_gen_sub_i64(t0, op1, op0);
3609 if (a->u) {
3610 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3611 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3612 } else {
3613 maxval = a->sf ? INT64_MAX : INT32_MAX;
3614 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3615 }
3616 } else {
3617 tcg_gen_sub_i64(t0, op0, op1);
3618 if (a->u) {
3619 maxval = 0;
3620 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3621 } else {
3622 maxval = a->sf ? INT64_MIN : INT32_MIN;
3623 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3624 }
3625 }
caf1cefc 3626
4481bbf2 3627 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3628 if (eq) {
caf1cefc
RH
3629 /* Equality means one more iteration. */
3630 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3631
34688dbc
RH
3632 /*
3633 * For the less-than while, if op1 is maxval (and the only time
3634 * the addition above could overflow), then we produce an all-true
3635 * predicate by setting the count to the vector length. This is
3636 * because the pseudocode is described as an increment + compare
3637 * loop, and the maximum integer would always compare true.
3638 * Similarly, the greater-than while has the same issue with the
3639 * minimum integer due to the decrement + compare loop.
bbd0968c 3640 */
34688dbc 3641 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3642 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3643 }
3644
bbd0968c
RH
3645 /* Bound to the maximum. */
3646 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3647
3648 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3649 tcg_gen_movi_i64(t1, 0);
3650 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3651 tcg_temp_free_i64(t1);
caf1cefc 3652
bbd0968c 3653 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3654 t2 = tcg_temp_new_i32();
3655 tcg_gen_extrl_i64_i32(t2, t0);
3656 tcg_temp_free_i64(t0);
bbd0968c
RH
3657
3658 /* Scale elements to bits. */
3659 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3660
e610906c
RH
3661 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3662 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3663
3664 ptr = tcg_temp_new_ptr();
3665 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3666
34688dbc 3667 if (a->lt) {
4481bbf2 3668 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3669 } else {
4481bbf2 3670 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3671 }
caf1cefc
RH
3672 do_pred_flags(t2);
3673
3674 tcg_temp_free_ptr(ptr);
3675 tcg_temp_free_i32(t2);
caf1cefc
RH
3676 return true;
3677}
3678
14f6dad1
RH
3679static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3680{
3681 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3682 TCGv_i32 t2;
14f6dad1
RH
3683 TCGv_ptr ptr;
3684 unsigned vsz = vec_full_reg_size(s);
3685 unsigned desc = 0;
3686
3687 if (!dc_isar_feature(aa64_sve2, s)) {
3688 return false;
3689 }
3690 if (!sve_access_check(s)) {
3691 return true;
3692 }
3693
3694 op0 = read_cpu_reg(s, a->rn, 1);
3695 op1 = read_cpu_reg(s, a->rm, 1);
3696
4481bbf2 3697 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3698 diff = tcg_temp_new_i64();
3699
3700 if (a->rw) {
3701 /* WHILERW */
3702 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3703 t1 = tcg_temp_new_i64();
3704 tcg_gen_sub_i64(diff, op0, op1);
3705 tcg_gen_sub_i64(t1, op1, op0);
3706 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3707 tcg_temp_free_i64(t1);
3708 /* Round down to a multiple of ESIZE. */
3709 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3710 /* If op1 == op0, diff == 0, and the condition is always true. */
3711 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3712 } else {
3713 /* WHILEWR */
3714 tcg_gen_sub_i64(diff, op1, op0);
3715 /* Round down to a multiple of ESIZE. */
3716 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3717 /* If op0 >= op1, diff <= 0, the condition is always true. */
3718 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3719 }
3720
3721 /* Bound to the maximum. */
3722 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3723
3724 /* Since we're bounded, pass as a 32-bit type. */
3725 t2 = tcg_temp_new_i32();
3726 tcg_gen_extrl_i64_i32(t2, diff);
3727 tcg_temp_free_i64(diff);
3728
3729 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3730 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3731
3732 ptr = tcg_temp_new_ptr();
3733 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3734
4481bbf2 3735 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3736 do_pred_flags(t2);
3737
3738 tcg_temp_free_ptr(ptr);
3739 tcg_temp_free_i32(t2);
14f6dad1
RH
3740 return true;
3741}
3742
ed491961
RH
3743/*
3744 *** SVE Integer Wide Immediate - Unpredicated Group
3745 */
3746
3a7be554 3747static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3748{
3749 if (a->esz == 0) {
3750 return false;
3751 }
3752 if (sve_access_check(s)) {
3753 unsigned vsz = vec_full_reg_size(s);
3754 int dofs = vec_full_reg_offset(s, a->rd);
3755 uint64_t imm;
3756
3757 /* Decode the VFP immediate. */
3758 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3759 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3760 }
3761 return true;
3762}
3763
3a7be554 3764static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3765{
3a7be554 3766 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3767 return false;
3768 }
3769 if (sve_access_check(s)) {
3770 unsigned vsz = vec_full_reg_size(s);
3771 int dofs = vec_full_reg_offset(s, a->rd);
3772
8711e71f 3773 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3774 }
3775 return true;
3776}
3777
3a7be554 3778static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3779{
3a7be554 3780 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3781 return false;
3782 }
3783 if (sve_access_check(s)) {
3784 unsigned vsz = vec_full_reg_size(s);
3785 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3786 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3787 }
3788 return true;
3789}
3790
3a7be554 3791static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3792{
3793 a->imm = -a->imm;
3a7be554 3794 return trans_ADD_zzi(s, a);
6e6a157d
RH
3795}
3796
3a7be554 3797static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3798{
53229a77 3799 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3800 static const GVecGen2s op[4] = {
3801 { .fni8 = tcg_gen_vec_sub8_i64,
3802 .fniv = tcg_gen_sub_vec,
3803 .fno = gen_helper_sve_subri_b,
53229a77 3804 .opt_opc = vecop_list,
6e6a157d
RH
3805 .vece = MO_8,
3806 .scalar_first = true },
3807 { .fni8 = tcg_gen_vec_sub16_i64,
3808 .fniv = tcg_gen_sub_vec,
3809 .fno = gen_helper_sve_subri_h,
53229a77 3810 .opt_opc = vecop_list,
6e6a157d
RH
3811 .vece = MO_16,
3812 .scalar_first = true },
3813 { .fni4 = tcg_gen_sub_i32,
3814 .fniv = tcg_gen_sub_vec,
3815 .fno = gen_helper_sve_subri_s,
53229a77 3816 .opt_opc = vecop_list,
6e6a157d
RH
3817 .vece = MO_32,
3818 .scalar_first = true },
3819 { .fni8 = tcg_gen_sub_i64,
3820 .fniv = tcg_gen_sub_vec,
3821 .fno = gen_helper_sve_subri_d,
53229a77 3822 .opt_opc = vecop_list,
6e6a157d
RH
3823 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3824 .vece = MO_64,
3825 .scalar_first = true }
3826 };
3827
3a7be554 3828 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3829 return false;
3830 }
3831 if (sve_access_check(s)) {
3832 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3833 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3834 vec_full_reg_offset(s, a->rn),
9fff3fcc 3835 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3836 }
3837 return true;
3838}
3839
3a7be554 3840static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3841{
3842 if (sve_access_check(s)) {
3843 unsigned vsz = vec_full_reg_size(s);
3844 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3845 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3846 }
3847 return true;
3848}
3849
3a7be554 3850static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3851{
3a7be554 3852 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3853 return false;
3854 }
3855 if (sve_access_check(s)) {
138a1f7b
RH
3856 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3857 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3858 }
3859 return true;
3860}
3861
3a7be554 3862static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3863{
3a7be554 3864 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3865}
3866
3a7be554 3867static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3868{
3a7be554 3869 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3870}
3871
3a7be554 3872static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3873{
3a7be554 3874 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3875}
3876
3a7be554 3877static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3878{
3a7be554 3879 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3880}
3881
3882static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3883{
3884 if (sve_access_check(s)) {
3885 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3886 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3887 vec_full_reg_offset(s, a->rn),
138a1f7b 3888 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3889 }
3890 return true;
3891}
3892
3893#define DO_ZZI(NAME, name) \
3a7be554 3894static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3895{ \
3896 static gen_helper_gvec_2i * const fns[4] = { \
3897 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3898 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3899 }; \
3900 return do_zzi_ool(s, a, fns[a->esz]); \
3901}
3902
3903DO_ZZI(SMAX, smax)
3904DO_ZZI(UMAX, umax)
3905DO_ZZI(SMIN, smin)
3906DO_ZZI(UMIN, umin)
3907
3908#undef DO_ZZI
3909
bc2bd697 3910static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
d730ecaa 3911{
bc2bd697 3912 static gen_helper_gvec_4 * const fns[2][2] = {
d730ecaa
RH
3913 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3914 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3915 };
3916
3917 if (sve_access_check(s)) {
bc2bd697 3918 gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
d730ecaa
RH
3919 }
3920 return true;
3921}
3922
814d4c52
RH
3923/*
3924 * SVE Multiply - Indexed
3925 */
3926
0a82d963
RH
3927static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3928 gen_helper_gvec_4 *fn)
16fcfdc7 3929{
0a82d963
RH
3930 if (fn == NULL) {
3931 return false;
3932 }
16fcfdc7 3933 if (sve_access_check(s)) {
0a82d963 3934 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
16fcfdc7
RH
3935 }
3936 return true;
3937}
3938
0a82d963
RH
3939#define DO_RRXR(NAME, FUNC) \
3940 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3941 { return do_zzxz_ool(s, a, FUNC); }
3942
3943DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3944DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3945DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3946DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3947
2867039a
RH
3948static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3949{
3950 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3951 return false;
3952 }
3953 return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3954}
3955
3956static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3957{
3958 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3959 return false;
3960 }
3961 return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
3962}
3963
0a82d963 3964#undef DO_RRXR
16fcfdc7 3965
814d4c52
RH
3966static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
3967 gen_helper_gvec_3 *fn)
3968{
3969 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
3970 return false;
3971 }
3972 if (sve_access_check(s)) {
3973 unsigned vsz = vec_full_reg_size(s);
3974 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
3975 vec_full_reg_offset(s, rn),
3976 vec_full_reg_offset(s, rm),
3977 vsz, vsz, data, fn);
3978 }
3979 return true;
3980}
3981
3982#define DO_SVE2_RRX(NAME, FUNC) \
3983 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
3984 { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
3985
3986DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3987DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3988DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3989
1aee2d70
RH
3990DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3991DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3992DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3993
3994DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3995DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3996DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3997
814d4c52
RH
3998#undef DO_SVE2_RRX
3999
b95f5eeb
RH
4000#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
4001 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4002 { \
4003 return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \
4004 (a->index << 1) | TOP, FUNC); \
4005 }
4006
4007DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
4008DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
4009DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
4010DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
4011
d3949c4c
RH
4012DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
4013DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
4014DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
4015DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
4016
4017DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
4018DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
4019DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
4020DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
4021
b95f5eeb
RH
4022#undef DO_SVE2_RRX_TB
4023
8a02aac7
RH
4024static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
4025 int data, gen_helper_gvec_4 *fn)
4026{
4027 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4028 return false;
4029 }
4030 if (sve_access_check(s)) {
4031 unsigned vsz = vec_full_reg_size(s);
4032 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
4033 vec_full_reg_offset(s, rn),
4034 vec_full_reg_offset(s, rm),
4035 vec_full_reg_offset(s, ra),
4036 vsz, vsz, data, fn);
4037 }
4038 return true;
4039}
4040
4041#define DO_SVE2_RRXR(NAME, FUNC) \
4042 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4043 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
4044
4045DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
4046DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
4047DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
4048
4049DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
4050DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4051DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4052
75d6d5fc
RH
4053DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4054DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4055DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4056
4057DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4058DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4059DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4060
8a02aac7
RH
4061#undef DO_SVE2_RRXR
4062
c5c455d7
RH
4063#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4064 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4065 { \
4066 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4067 (a->index << 1) | TOP, FUNC); \
4068 }
4069
4070DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4071DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4072DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4073DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4074
4075DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4076DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4077DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4078DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
d462469f
RH
4079
4080DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4081DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4082DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4083DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4084
4085DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4086DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4087DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4088DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4089
4090DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4091DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4092DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4093DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4094
4095DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4096DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4097DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4098DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
4099
4100#undef DO_SVE2_RRXR_TB
4101
3b787ed8
RH
4102#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4103 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
4104 { \
4105 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \
4106 (a->index << 2) | a->rot, FUNC); \
4107 }
4108
4109DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4110DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4111
4112DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4113DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4114
21068f39
RH
4115DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4116DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4117
3b787ed8
RH
4118#undef DO_SVE2_RRXR_ROT
4119
ca40a6e6
RH
4120/*
4121 *** SVE Floating Point Multiply-Add Indexed Group
4122 */
4123
0a82d963 4124static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
4125{
4126 static gen_helper_gvec_4_ptr * const fns[3] = {
4127 gen_helper_gvec_fmla_idx_h,
4128 gen_helper_gvec_fmla_idx_s,
4129 gen_helper_gvec_fmla_idx_d,
4130 };
4131
4132 if (sve_access_check(s)) {
4133 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4134 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4135 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4136 vec_full_reg_offset(s, a->rn),
4137 vec_full_reg_offset(s, a->rm),
4138 vec_full_reg_offset(s, a->ra),
0a82d963 4139 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
4140 fns[a->esz - 1]);
4141 tcg_temp_free_ptr(status);
4142 }
4143 return true;
4144}
4145
0a82d963
RH
4146static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4147{
4148 return do_FMLA_zzxz(s, a, false);
4149}
4150
4151static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4152{
4153 return do_FMLA_zzxz(s, a, true);
4154}
4155
ca40a6e6
RH
4156/*
4157 *** SVE Floating Point Multiply Indexed Group
4158 */
4159
3a7be554 4160static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
4161{
4162 static gen_helper_gvec_3_ptr * const fns[3] = {
4163 gen_helper_gvec_fmul_idx_h,
4164 gen_helper_gvec_fmul_idx_s,
4165 gen_helper_gvec_fmul_idx_d,
4166 };
4167
4168 if (sve_access_check(s)) {
4169 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4170 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
4171 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4172 vec_full_reg_offset(s, a->rn),
4173 vec_full_reg_offset(s, a->rm),
4174 status, vsz, vsz, a->index, fns[a->esz - 1]);
4175 tcg_temp_free_ptr(status);
4176 }
4177 return true;
4178}
4179
23fbe79f
RH
4180/*
4181 *** SVE Floating Point Fast Reduction Group
4182 */
4183
4184typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4185 TCGv_ptr, TCGv_i32);
4186
4187static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4188 gen_helper_fp_reduce *fn)
4189{
4190 unsigned vsz = vec_full_reg_size(s);
4191 unsigned p2vsz = pow2ceil(vsz);
c648c9b7 4192 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
4193 TCGv_ptr t_zn, t_pg, status;
4194 TCGv_i64 temp;
4195
4196 temp = tcg_temp_new_i64();
4197 t_zn = tcg_temp_new_ptr();
4198 t_pg = tcg_temp_new_ptr();
4199
4200 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4201 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4202 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
4203
4204 fn(temp, t_zn, t_pg, status, t_desc);
4205 tcg_temp_free_ptr(t_zn);
4206 tcg_temp_free_ptr(t_pg);
4207 tcg_temp_free_ptr(status);
4208 tcg_temp_free_i32(t_desc);
4209
4210 write_fp_dreg(s, a->rd, temp);
4211 tcg_temp_free_i64(temp);
4212}
4213
4214#define DO_VPZ(NAME, name) \
3a7be554 4215static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
4216{ \
4217 static gen_helper_fp_reduce * const fns[3] = { \
4218 gen_helper_sve_##name##_h, \
4219 gen_helper_sve_##name##_s, \
4220 gen_helper_sve_##name##_d, \
4221 }; \
4222 if (a->esz == 0) { \
4223 return false; \
4224 } \
4225 if (sve_access_check(s)) { \
4226 do_reduce(s, a, fns[a->esz - 1]); \
4227 } \
4228 return true; \
4229}
4230
4231DO_VPZ(FADDV, faddv)
4232DO_VPZ(FMINNMV, fminnmv)
4233DO_VPZ(FMAXNMV, fmaxnmv)
4234DO_VPZ(FMINV, fminv)
4235DO_VPZ(FMAXV, fmaxv)
4236
3887c038
RH
4237/*
4238 *** SVE Floating Point Unary Operations - Unpredicated Group
4239 */
4240
4241static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4242{
4243 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4244 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
4245
4246 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4247 vec_full_reg_offset(s, a->rn),
4248 status, vsz, vsz, 0, fn);
4249 tcg_temp_free_ptr(status);
4250}
4251
3a7be554 4252static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4253{
4254 static gen_helper_gvec_2_ptr * const fns[3] = {
4255 gen_helper_gvec_frecpe_h,
4256 gen_helper_gvec_frecpe_s,
4257 gen_helper_gvec_frecpe_d,
4258 };
4259 if (a->esz == 0) {
4260 return false;
4261 }
4262 if (sve_access_check(s)) {
4263 do_zz_fp(s, a, fns[a->esz - 1]);
4264 }
4265 return true;
4266}
4267
3a7be554 4268static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
4269{
4270 static gen_helper_gvec_2_ptr * const fns[3] = {
4271 gen_helper_gvec_frsqrte_h,
4272 gen_helper_gvec_frsqrte_s,
4273 gen_helper_gvec_frsqrte_d,
4274 };
4275 if (a->esz == 0) {
4276 return false;
4277 }
4278 if (sve_access_check(s)) {
4279 do_zz_fp(s, a, fns[a->esz - 1]);
4280 }
4281 return true;
4282}
4283
4d2e2a03
RH
4284/*
4285 *** SVE Floating Point Compare with Zero Group
4286 */
4287
4288static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4289 gen_helper_gvec_3_ptr *fn)
4290{
4291 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4292 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
4293
4294 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4295 vec_full_reg_offset(s, a->rn),
4296 pred_full_reg_offset(s, a->pg),
4297 status, vsz, vsz, 0, fn);
4298 tcg_temp_free_ptr(status);
4299}
4300
4301#define DO_PPZ(NAME, name) \
3a7be554 4302static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
4303{ \
4304 static gen_helper_gvec_3_ptr * const fns[3] = { \
4305 gen_helper_sve_##name##_h, \
4306 gen_helper_sve_##name##_s, \
4307 gen_helper_sve_##name##_d, \
4308 }; \
4309 if (a->esz == 0) { \
4310 return false; \
4311 } \
4312 if (sve_access_check(s)) { \
4313 do_ppz_fp(s, a, fns[a->esz - 1]); \
4314 } \
4315 return true; \
4316}
4317
4318DO_PPZ(FCMGE_ppz0, fcmge0)
4319DO_PPZ(FCMGT_ppz0, fcmgt0)
4320DO_PPZ(FCMLE_ppz0, fcmle0)
4321DO_PPZ(FCMLT_ppz0, fcmlt0)
4322DO_PPZ(FCMEQ_ppz0, fcmeq0)
4323DO_PPZ(FCMNE_ppz0, fcmne0)
4324
4325#undef DO_PPZ
4326
67fcd9ad
RH
4327/*
4328 *** SVE floating-point trig multiply-add coefficient
4329 */
4330
3a7be554 4331static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4332{
4333 static gen_helper_gvec_3_ptr * const fns[3] = {
4334 gen_helper_sve_ftmad_h,
4335 gen_helper_sve_ftmad_s,
4336 gen_helper_sve_ftmad_d,
4337 };
4338
4339 if (a->esz == 0) {
4340 return false;
4341 }
4342 if (sve_access_check(s)) {
4343 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4344 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4345 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4346 vec_full_reg_offset(s, a->rn),
4347 vec_full_reg_offset(s, a->rm),
4348 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4349 tcg_temp_free_ptr(status);
4350 }
4351 return true;
4352}
4353
7f9ddf64
RH
4354/*
4355 *** SVE Floating Point Accumulating Reduction Group
4356 */
4357
3a7be554 4358static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4359{
4360 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4361 TCGv_ptr, TCGv_ptr, TCGv_i32);
4362 static fadda_fn * const fns[3] = {
4363 gen_helper_sve_fadda_h,
4364 gen_helper_sve_fadda_s,
4365 gen_helper_sve_fadda_d,
4366 };
4367 unsigned vsz = vec_full_reg_size(s);
4368 TCGv_ptr t_rm, t_pg, t_fpst;
4369 TCGv_i64 t_val;
4370 TCGv_i32 t_desc;
4371
4372 if (a->esz == 0) {
4373 return false;
4374 }
4375 if (!sve_access_check(s)) {
4376 return true;
4377 }
4378
4379 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4380 t_rm = tcg_temp_new_ptr();
4381 t_pg = tcg_temp_new_ptr();
4382 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4383 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4384 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
4385 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4386
4387 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4388
4389 tcg_temp_free_i32(t_desc);
4390 tcg_temp_free_ptr(t_fpst);
4391 tcg_temp_free_ptr(t_pg);
4392 tcg_temp_free_ptr(t_rm);
4393
4394 write_fp_dreg(s, a->rd, t_val);
4395 tcg_temp_free_i64(t_val);
4396 return true;
4397}
4398
29b80469
RH
4399/*
4400 *** SVE Floating Point Arithmetic - Unpredicated Group
4401 */
4402
4403static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4404 gen_helper_gvec_3_ptr *fn)
4405{
4406 if (fn == NULL) {
4407 return false;
4408 }
4409 if (sve_access_check(s)) {
4410 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4411 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4412 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4413 vec_full_reg_offset(s, a->rn),
4414 vec_full_reg_offset(s, a->rm),
4415 status, vsz, vsz, 0, fn);
4416 tcg_temp_free_ptr(status);
4417 }
4418 return true;
4419}
4420
4421
4422#define DO_FP3(NAME, name) \
3a7be554 4423static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4424{ \
4425 static gen_helper_gvec_3_ptr * const fns[4] = { \
4426 NULL, gen_helper_gvec_##name##_h, \
4427 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4428 }; \
4429 return do_zzz_fp(s, a, fns[a->esz]); \
4430}
4431
4432DO_FP3(FADD_zzz, fadd)
4433DO_FP3(FSUB_zzz, fsub)
4434DO_FP3(FMUL_zzz, fmul)
4435DO_FP3(FTSMUL, ftsmul)
4436DO_FP3(FRECPS, recps)
4437DO_FP3(FRSQRTS, rsqrts)
4438
4439#undef DO_FP3
4440
ec3b87c2
RH
4441/*
4442 *** SVE Floating Point Arithmetic - Predicated Group
4443 */
4444
4445static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4446 gen_helper_gvec_4_ptr *fn)
4447{
4448 if (fn == NULL) {
4449 return false;
4450 }
4451 if (sve_access_check(s)) {
4452 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4453 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4454 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4455 vec_full_reg_offset(s, a->rn),
4456 vec_full_reg_offset(s, a->rm),
4457 pred_full_reg_offset(s, a->pg),
4458 status, vsz, vsz, 0, fn);
4459 tcg_temp_free_ptr(status);
4460 }
4461 return true;
4462}
4463
4464#define DO_FP3(NAME, name) \
3a7be554 4465static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4466{ \
4467 static gen_helper_gvec_4_ptr * const fns[4] = { \
4468 NULL, gen_helper_sve_##name##_h, \
4469 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4470 }; \
4471 return do_zpzz_fp(s, a, fns[a->esz]); \
4472}
4473
4474DO_FP3(FADD_zpzz, fadd)
4475DO_FP3(FSUB_zpzz, fsub)
4476DO_FP3(FMUL_zpzz, fmul)
4477DO_FP3(FMIN_zpzz, fmin)
4478DO_FP3(FMAX_zpzz, fmax)
4479DO_FP3(FMINNM_zpzz, fminnum)
4480DO_FP3(FMAXNM_zpzz, fmaxnum)
4481DO_FP3(FABD, fabd)
4482DO_FP3(FSCALE, fscalbn)
4483DO_FP3(FDIV, fdiv)
4484DO_FP3(FMULX, fmulx)
4485
4486#undef DO_FP3
8092c6a3 4487
cc48affe
RH
4488typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4489 TCGv_i64, TCGv_ptr, TCGv_i32);
4490
4491static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4492 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4493{
4494 unsigned vsz = vec_full_reg_size(s);
4495 TCGv_ptr t_zd, t_zn, t_pg, status;
4496 TCGv_i32 desc;
4497
4498 t_zd = tcg_temp_new_ptr();
4499 t_zn = tcg_temp_new_ptr();
4500 t_pg = tcg_temp_new_ptr();
4501 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4502 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4504
cdfb22bb 4505 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
4506 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4507 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4508
4509 tcg_temp_free_i32(desc);
4510 tcg_temp_free_ptr(status);
4511 tcg_temp_free_ptr(t_pg);
4512 tcg_temp_free_ptr(t_zn);
4513 tcg_temp_free_ptr(t_zd);
4514}
4515
4516static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4517 gen_helper_sve_fp2scalar *fn)
4518{
138a1f7b
RH
4519 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4520 tcg_constant_i64(imm), fn);
cc48affe
RH
4521}
4522
4523#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4524static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4525{ \
4526 static gen_helper_sve_fp2scalar * const fns[3] = { \
4527 gen_helper_sve_##name##_h, \
4528 gen_helper_sve_##name##_s, \
4529 gen_helper_sve_##name##_d \
4530 }; \
4531 static uint64_t const val[3][2] = { \
4532 { float16_##const0, float16_##const1 }, \
4533 { float32_##const0, float32_##const1 }, \
4534 { float64_##const0, float64_##const1 }, \
4535 }; \
4536 if (a->esz == 0) { \
4537 return false; \
4538 } \
4539 if (sve_access_check(s)) { \
4540 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4541 } \
4542 return true; \
4543}
4544
cc48affe
RH
4545DO_FP_IMM(FADD, fadds, half, one)
4546DO_FP_IMM(FSUB, fsubs, half, one)
4547DO_FP_IMM(FMUL, fmuls, half, two)
4548DO_FP_IMM(FSUBR, fsubrs, half, one)
4549DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4550DO_FP_IMM(FMINNM, fminnms, zero, one)
4551DO_FP_IMM(FMAX, fmaxs, zero, one)
4552DO_FP_IMM(FMIN, fmins, zero, one)
4553
4554#undef DO_FP_IMM
4555
abfdefd5
RH
4556static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4557 gen_helper_gvec_4_ptr *fn)
4558{
4559 if (fn == NULL) {
4560 return false;
4561 }
4562 if (sve_access_check(s)) {
4563 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4564 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4565 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4566 vec_full_reg_offset(s, a->rn),
4567 vec_full_reg_offset(s, a->rm),
4568 pred_full_reg_offset(s, a->pg),
4569 status, vsz, vsz, 0, fn);
4570 tcg_temp_free_ptr(status);
4571 }
4572 return true;
4573}
4574
4575#define DO_FPCMP(NAME, name) \
3a7be554 4576static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4577{ \
4578 static gen_helper_gvec_4_ptr * const fns[4] = { \
4579 NULL, gen_helper_sve_##name##_h, \
4580 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4581 }; \
4582 return do_fp_cmp(s, a, fns[a->esz]); \
4583}
4584
4585DO_FPCMP(FCMGE, fcmge)
4586DO_FPCMP(FCMGT, fcmgt)
4587DO_FPCMP(FCMEQ, fcmeq)
4588DO_FPCMP(FCMNE, fcmne)
4589DO_FPCMP(FCMUO, fcmuo)
4590DO_FPCMP(FACGE, facge)
4591DO_FPCMP(FACGT, facgt)
4592
4593#undef DO_FPCMP
4594
3a7be554 4595static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4596{
4597 static gen_helper_gvec_4_ptr * const fns[3] = {
4598 gen_helper_sve_fcadd_h,
4599 gen_helper_sve_fcadd_s,
4600 gen_helper_sve_fcadd_d
4601 };
4602
4603 if (a->esz == 0) {
4604 return false;
4605 }
4606 if (sve_access_check(s)) {
4607 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4608 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4609 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4610 vec_full_reg_offset(s, a->rn),
4611 vec_full_reg_offset(s, a->rm),
4612 pred_full_reg_offset(s, a->pg),
4613 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4614 tcg_temp_free_ptr(status);
4615 }
4616 return true;
4617}
4618
08975da9
RH
4619static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4620 gen_helper_gvec_5_ptr *fn)
6ceabaad 4621{
08975da9 4622 if (a->esz == 0) {
6ceabaad
RH
4623 return false;
4624 }
08975da9
RH
4625 if (sve_access_check(s)) {
4626 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4627 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4628 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4629 vec_full_reg_offset(s, a->rn),
4630 vec_full_reg_offset(s, a->rm),
4631 vec_full_reg_offset(s, a->ra),
4632 pred_full_reg_offset(s, a->pg),
4633 status, vsz, vsz, 0, fn);
4634 tcg_temp_free_ptr(status);
6ceabaad 4635 }
6ceabaad
RH
4636 return true;
4637}
4638
4639#define DO_FMLA(NAME, name) \
3a7be554 4640static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4641{ \
08975da9 4642 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4643 NULL, gen_helper_sve_##name##_h, \
4644 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4645 }; \
4646 return do_fmla(s, a, fns[a->esz]); \
4647}
4648
4649DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4650DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4651DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4652DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4653
4654#undef DO_FMLA
4655
3a7be554 4656static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4657{
08975da9
RH
4658 static gen_helper_gvec_5_ptr * const fns[4] = {
4659 NULL,
05f48bab
RH
4660 gen_helper_sve_fcmla_zpzzz_h,
4661 gen_helper_sve_fcmla_zpzzz_s,
4662 gen_helper_sve_fcmla_zpzzz_d,
4663 };
4664
4665 if (a->esz == 0) {
4666 return false;
4667 }
4668 if (sve_access_check(s)) {
4669 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4670 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4671 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4672 vec_full_reg_offset(s, a->rn),
4673 vec_full_reg_offset(s, a->rm),
4674 vec_full_reg_offset(s, a->ra),
4675 pred_full_reg_offset(s, a->pg),
4676 status, vsz, vsz, a->rot, fns[a->esz]);
4677 tcg_temp_free_ptr(status);
05f48bab
RH
4678 }
4679 return true;
4680}
4681
3a7be554 4682static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4683{
636ddeb1 4684 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4685 gen_helper_gvec_fcmlah_idx,
4686 gen_helper_gvec_fcmlas_idx,
4687 };
4688
4689 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4690 tcg_debug_assert(a->rd == a->ra);
4691 if (sve_access_check(s)) {
4692 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4693 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4694 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4695 vec_full_reg_offset(s, a->rn),
4696 vec_full_reg_offset(s, a->rm),
636ddeb1 4697 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4698 status, vsz, vsz,
4699 a->index * 4 + a->rot,
4700 fns[a->esz - 1]);
4701 tcg_temp_free_ptr(status);
4702 }
4703 return true;
4704}
4705
8092c6a3
RH
4706/*
4707 *** SVE Floating Point Unary Operations Predicated Group
4708 */
4709
4710static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4711 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4712{
4713 if (sve_access_check(s)) {
4714 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4715 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4716 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4717 vec_full_reg_offset(s, rn),
4718 pred_full_reg_offset(s, pg),
4719 status, vsz, vsz, 0, fn);
4720 tcg_temp_free_ptr(status);
4721 }
4722 return true;
4723}
4724
3a7be554 4725static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4726{
e4ab5124 4727 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4728}
4729
3a7be554 4730static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4731{
4732 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4733}
4734
d29b17ca
RH
4735static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4736{
4737 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4738 return false;
4739 }
4740 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4741}
4742
3a7be554 4743static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4744{
e4ab5124 4745 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4746}
4747
3a7be554 4748static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4749{
4750 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4751}
4752
3a7be554 4753static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4754{
4755 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4756}
4757
3a7be554 4758static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4759{
4760 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4761}
4762
3a7be554 4763static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4764{
4765 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4766}
4767
3a7be554 4768static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4769{
4770 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4771}
4772
3a7be554 4773static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4774{
4775 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4776}
4777
3a7be554 4778static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4779{
4780 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4781}
4782
3a7be554 4783static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4784{
4785 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4786}
4787
3a7be554 4788static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4789{
4790 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4791}
4792
3a7be554 4793static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4794{
4795 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4796}
4797
3a7be554 4798static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4799{
4800 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4801}
4802
3a7be554 4803static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4804{
4805 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4806}
4807
3a7be554 4808static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4809{
4810 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4811}
4812
3a7be554 4813static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4814{
4815 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4816}
4817
3a7be554 4818static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4819{
4820 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4821}
4822
3a7be554 4823static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4824{
4825 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4826}
4827
3a7be554 4828static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4829{
4830 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4831}
4832
cda3c753
RH
4833static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4834 gen_helper_sve_frint_h,
4835 gen_helper_sve_frint_s,
4836 gen_helper_sve_frint_d
4837};
4838
3a7be554 4839static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4840{
4841 if (a->esz == 0) {
4842 return false;
4843 }
4844 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4845 frint_fns[a->esz - 1]);
4846}
4847
3a7be554 4848static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4849{
4850 static gen_helper_gvec_3_ptr * const fns[3] = {
4851 gen_helper_sve_frintx_h,
4852 gen_helper_sve_frintx_s,
4853 gen_helper_sve_frintx_d
4854 };
4855 if (a->esz == 0) {
4856 return false;
4857 }
4858 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4859}
4860
95365277
SL
4861static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4862 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4863{
cda3c753
RH
4864 if (sve_access_check(s)) {
4865 unsigned vsz = vec_full_reg_size(s);
4866 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4867 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4868
4869 gen_helper_set_rmode(tmode, tmode, status);
4870
4871 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4872 vec_full_reg_offset(s, a->rn),
4873 pred_full_reg_offset(s, a->pg),
95365277 4874 status, vsz, vsz, 0, fn);
cda3c753
RH
4875
4876 gen_helper_set_rmode(tmode, tmode, status);
4877 tcg_temp_free_i32(tmode);
4878 tcg_temp_free_ptr(status);
4879 }
4880 return true;
4881}
4882
3a7be554 4883static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4884{
95365277
SL
4885 if (a->esz == 0) {
4886 return false;
4887 }
4888 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4889}
4890
3a7be554 4891static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4892{
95365277
SL
4893 if (a->esz == 0) {
4894 return false;
4895 }
4896 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4897}
4898
3a7be554 4899static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4900{
95365277
SL
4901 if (a->esz == 0) {
4902 return false;
4903 }
4904 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4905}
4906
3a7be554 4907static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4908{
95365277
SL
4909 if (a->esz == 0) {
4910 return false;
4911 }
4912 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4913}
4914
3a7be554 4915static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4916{
95365277
SL
4917 if (a->esz == 0) {
4918 return false;
4919 }
4920 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4921}
4922
3a7be554 4923static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4924{
4925 static gen_helper_gvec_3_ptr * const fns[3] = {
4926 gen_helper_sve_frecpx_h,
4927 gen_helper_sve_frecpx_s,
4928 gen_helper_sve_frecpx_d
4929 };
4930 if (a->esz == 0) {
4931 return false;
4932 }
4933 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4934}
4935
3a7be554 4936static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4937{
4938 static gen_helper_gvec_3_ptr * const fns[3] = {
4939 gen_helper_sve_fsqrt_h,
4940 gen_helper_sve_fsqrt_s,
4941 gen_helper_sve_fsqrt_d
4942 };
4943 if (a->esz == 0) {
4944 return false;
4945 }
4946 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4947}
4948
3a7be554 4949static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4950{
4951 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4952}
4953
3a7be554 4954static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4955{
4956 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4957}
4958
3a7be554 4959static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4960{
4961 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4962}
4963
3a7be554 4964static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4965{
4966 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4967}
4968
3a7be554 4969static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4970{
4971 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4972}
4973
3a7be554 4974static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4975{
4976 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4977}
4978
3a7be554 4979static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4980{
4981 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4982}
4983
3a7be554 4984static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4985{
4986 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4987}
4988
3a7be554 4989static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4990{
4991 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4992}
4993
3a7be554 4994static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4995{
4996 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4997}
4998
3a7be554 4999static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5000{
5001 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
5002}
5003
3a7be554 5004static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5005{
5006 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
5007}
5008
3a7be554 5009static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5010{
5011 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
5012}
5013
3a7be554 5014static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
5015{
5016 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
5017}
5018
d1822297
RH
5019/*
5020 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
5021 */
5022
5023/* Subroutine loading a vector register at VOFS of LEN bytes.
5024 * The load should begin at the address Rn + IMM.
5025 */
5026
19f2acc9 5027static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 5028{
19f2acc9
RH
5029 int len_align = QEMU_ALIGN_DOWN(len, 8);
5030 int len_remain = len % 8;
5031 int nparts = len / 8 + ctpop8(len_remain);
d1822297 5032 int midx = get_mem_index(s);
b2aa8879 5033 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 5034
b2aa8879
RH
5035 dirty_addr = tcg_temp_new_i64();
5036 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5037 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 5038 tcg_temp_free_i64(dirty_addr);
d1822297 5039
b2aa8879
RH
5040 /*
5041 * Note that unpredicated load/store of vector/predicate registers
d1822297 5042 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 5043 * operations on larger quantities.
d1822297
RH
5044 * Attempt to keep code expansion to a minimum by limiting the
5045 * amount of unrolling done.
5046 */
5047 if (nparts <= 4) {
5048 int i;
5049
b2aa8879 5050 t0 = tcg_temp_new_i64();
d1822297 5051 for (i = 0; i < len_align; i += 8) {
fc313c64 5052 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 5053 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 5054 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5055 }
b2aa8879 5056 tcg_temp_free_i64(t0);
d1822297
RH
5057 } else {
5058 TCGLabel *loop = gen_new_label();
5059 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5060
b2aa8879
RH
5061 /* Copy the clean address into a local temp, live across the loop. */
5062 t0 = clean_addr;
4b4dc975 5063 clean_addr = new_tmp_a64_local(s);
b2aa8879 5064 tcg_gen_mov_i64(clean_addr, t0);
d1822297 5065
b2aa8879 5066 gen_set_label(loop);
d1822297 5067
b2aa8879 5068 t0 = tcg_temp_new_i64();
fc313c64 5069 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 5070 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 5071
b2aa8879 5072 tp = tcg_temp_new_ptr();
d1822297
RH
5073 tcg_gen_add_ptr(tp, cpu_env, i);
5074 tcg_gen_addi_ptr(i, i, 8);
5075 tcg_gen_st_i64(t0, tp, vofs);
5076 tcg_temp_free_ptr(tp);
b2aa8879 5077 tcg_temp_free_i64(t0);
d1822297
RH
5078
5079 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5080 tcg_temp_free_ptr(i);
5081 }
5082
b2aa8879
RH
5083 /*
5084 * Predicate register loads can be any multiple of 2.
d1822297
RH
5085 * Note that we still store the entire 64-bit unit into cpu_env.
5086 */
5087 if (len_remain) {
b2aa8879 5088 t0 = tcg_temp_new_i64();
d1822297
RH
5089 switch (len_remain) {
5090 case 2:
5091 case 4:
5092 case 8:
b2aa8879
RH
5093 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5094 MO_LE | ctz32(len_remain));
d1822297
RH
5095 break;
5096
5097 case 6:
5098 t1 = tcg_temp_new_i64();
b2aa8879
RH
5099 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5100 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5101 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
5102 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5103 tcg_temp_free_i64(t1);
5104 break;
5105
5106 default:
5107 g_assert_not_reached();
5108 }
5109 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 5110 tcg_temp_free_i64(t0);
d1822297 5111 }
d1822297
RH
5112}
5113
5047c204 5114/* Similarly for stores. */
19f2acc9 5115static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 5116{
19f2acc9
RH
5117 int len_align = QEMU_ALIGN_DOWN(len, 8);
5118 int len_remain = len % 8;
5119 int nparts = len / 8 + ctpop8(len_remain);
5047c204 5120 int midx = get_mem_index(s);
bba87d0a 5121 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 5122
bba87d0a
RH
5123 dirty_addr = tcg_temp_new_i64();
5124 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 5125 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 5126 tcg_temp_free_i64(dirty_addr);
5047c204
RH
5127
5128 /* Note that unpredicated load/store of vector/predicate registers
5129 * are defined as a stream of bytes, which equates to little-endian
5130 * operations on larger quantities. There is no nice way to force
5131 * a little-endian store for aarch64_be-linux-user out of line.
5132 *
5133 * Attempt to keep code expansion to a minimum by limiting the
5134 * amount of unrolling done.
5135 */
5136 if (nparts <= 4) {
5137 int i;
5138
bba87d0a 5139 t0 = tcg_temp_new_i64();
5047c204
RH
5140 for (i = 0; i < len_align; i += 8) {
5141 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 5142 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 5143 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 5144 }
bba87d0a 5145 tcg_temp_free_i64(t0);
5047c204
RH
5146 } else {
5147 TCGLabel *loop = gen_new_label();
bba87d0a 5148 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 5149
bba87d0a
RH
5150 /* Copy the clean address into a local temp, live across the loop. */
5151 t0 = clean_addr;
4b4dc975 5152 clean_addr = new_tmp_a64_local(s);
bba87d0a 5153 tcg_gen_mov_i64(clean_addr, t0);
5047c204 5154
bba87d0a 5155 gen_set_label(loop);
5047c204 5156
bba87d0a
RH
5157 t0 = tcg_temp_new_i64();
5158 tp = tcg_temp_new_ptr();
5159 tcg_gen_add_ptr(tp, cpu_env, i);
5160 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 5161 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
5162 tcg_temp_free_ptr(tp);
5163
fc313c64 5164 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
5165 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5166 tcg_temp_free_i64(t0);
5047c204
RH
5167
5168 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5169 tcg_temp_free_ptr(i);
5170 }
5171
5172 /* Predicate register stores can be any multiple of 2. */
5173 if (len_remain) {
bba87d0a 5174 t0 = tcg_temp_new_i64();
5047c204 5175 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
5176
5177 switch (len_remain) {
5178 case 2:
5179 case 4:
5180 case 8:
bba87d0a
RH
5181 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5182 MO_LE | ctz32(len_remain));
5047c204
RH
5183 break;
5184
5185 case 6:
bba87d0a
RH
5186 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5187 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 5188 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 5189 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
5190 break;
5191
5192 default:
5193 g_assert_not_reached();
5194 }
bba87d0a 5195 tcg_temp_free_i64(t0);
5047c204 5196 }
5047c204
RH
5197}
5198
3a7be554 5199static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
5200{
5201 if (sve_access_check(s)) {
5202 int size = vec_full_reg_size(s);
5203 int off = vec_full_reg_offset(s, a->rd);
5204 do_ldr(s, off, size, a->rn, a->imm * size);
5205 }
5206 return true;
5207}
5208
3a7be554 5209static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
5210{
5211 if (sve_access_check(s)) {
5212 int size = pred_full_reg_size(s);
5213 int off = pred_full_reg_offset(s, a->rd);
5214 do_ldr(s, off, size, a->rn, a->imm * size);
5215 }
5216 return true;
5217}
c4e7c493 5218
3a7be554 5219static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
5220{
5221 if (sve_access_check(s)) {
5222 int size = vec_full_reg_size(s);
5223 int off = vec_full_reg_offset(s, a->rd);
5224 do_str(s, off, size, a->rn, a->imm * size);
5225 }
5226 return true;
5227}
5228
3a7be554 5229static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
5230{
5231 if (sve_access_check(s)) {
5232 int size = pred_full_reg_size(s);
5233 int off = pred_full_reg_offset(s, a->rd);
5234 do_str(s, off, size, a->rn, a->imm * size);
5235 }
5236 return true;
5237}
5238
c4e7c493
RH
5239/*
5240 *** SVE Memory - Contiguous Load Group
5241 */
5242
5243/* The memory mode of the dtype. */
14776ab5 5244static const MemOp dtype_mop[16] = {
c4e7c493
RH
5245 MO_UB, MO_UB, MO_UB, MO_UB,
5246 MO_SL, MO_UW, MO_UW, MO_UW,
5247 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 5248 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
5249};
5250
5251#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5252
5253/* The vector element size of dtype. */
5254static const uint8_t dtype_esz[16] = {
5255 0, 1, 2, 3,
5256 3, 1, 2, 3,
5257 3, 2, 2, 3,
5258 3, 2, 1, 3
5259};
5260
5261static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
5262 int dtype, uint32_t mte_n, bool is_write,
5263 gen_helper_gvec_mem *fn)
c4e7c493
RH
5264{
5265 unsigned vsz = vec_full_reg_size(s);
5266 TCGv_ptr t_pg;
500d0484 5267 TCGv_i32 t_desc;
206adacf 5268 int desc = 0;
c4e7c493 5269
206adacf
RH
5270 /*
5271 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
5272 * registers as pointers, so encode the regno into the data field.
5273 * For consistency, do this even for LD1.
5274 */
9473d0ec 5275 if (s->mte_active[0]) {
206adacf
RH
5276 int msz = dtype_msz(dtype);
5277
5278 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5279 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5280 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5281 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5282 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 5283 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
5284 } else {
5285 addr = clean_data_tbi(s, addr);
206adacf 5286 }
9473d0ec 5287
206adacf 5288 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 5289 t_desc = tcg_const_i32(desc);
c4e7c493
RH
5290 t_pg = tcg_temp_new_ptr();
5291
5292 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 5293 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
5294
5295 tcg_temp_free_ptr(t_pg);
500d0484 5296 tcg_temp_free_i32(t_desc);
c4e7c493
RH
5297}
5298
c182c6db
RH
5299/* Indexed by [mte][be][dtype][nreg] */
5300static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5301 { /* mte inactive, little-endian */
5302 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5303 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5304 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5305 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5306 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5307
5308 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5309 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5310 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5311 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5312 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5313
5314 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5315 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5316 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5317 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5318 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5319
5320 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5321 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5322 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5323 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5324 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5325
5326 /* mte inactive, big-endian */
5327 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5328 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5329 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5330 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5331 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5332
5333 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5334 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5335 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5336 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5337 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5338
5339 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5340 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5341 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5342 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5343 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5344
5345 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5346 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5347 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5348 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5349 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5350
5351 { /* mte active, little-endian */
5352 { { gen_helper_sve_ld1bb_r_mte,
5353 gen_helper_sve_ld2bb_r_mte,
5354 gen_helper_sve_ld3bb_r_mte,
5355 gen_helper_sve_ld4bb_r_mte },
5356 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5357 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5358 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5359
5360 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5361 { gen_helper_sve_ld1hh_le_r_mte,
5362 gen_helper_sve_ld2hh_le_r_mte,
5363 gen_helper_sve_ld3hh_le_r_mte,
5364 gen_helper_sve_ld4hh_le_r_mte },
5365 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5366 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5367
5368 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5369 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5370 { gen_helper_sve_ld1ss_le_r_mte,
5371 gen_helper_sve_ld2ss_le_r_mte,
5372 gen_helper_sve_ld3ss_le_r_mte,
5373 gen_helper_sve_ld4ss_le_r_mte },
5374 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5375
5376 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5377 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5378 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5379 { gen_helper_sve_ld1dd_le_r_mte,
5380 gen_helper_sve_ld2dd_le_r_mte,
5381 gen_helper_sve_ld3dd_le_r_mte,
5382 gen_helper_sve_ld4dd_le_r_mte } },
5383
5384 /* mte active, big-endian */
5385 { { gen_helper_sve_ld1bb_r_mte,
5386 gen_helper_sve_ld2bb_r_mte,
5387 gen_helper_sve_ld3bb_r_mte,
5388 gen_helper_sve_ld4bb_r_mte },
5389 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5390 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5391 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5392
5393 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5394 { gen_helper_sve_ld1hh_be_r_mte,
5395 gen_helper_sve_ld2hh_be_r_mte,
5396 gen_helper_sve_ld3hh_be_r_mte,
5397 gen_helper_sve_ld4hh_be_r_mte },
5398 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5399 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5400
5401 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5402 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5403 { gen_helper_sve_ld1ss_be_r_mte,
5404 gen_helper_sve_ld2ss_be_r_mte,
5405 gen_helper_sve_ld3ss_be_r_mte,
5406 gen_helper_sve_ld4ss_be_r_mte },
5407 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5408
5409 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5410 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5411 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5412 { gen_helper_sve_ld1dd_be_r_mte,
5413 gen_helper_sve_ld2dd_be_r_mte,
5414 gen_helper_sve_ld3dd_be_r_mte,
5415 gen_helper_sve_ld4dd_be_r_mte } } },
5416};
5417
c4e7c493
RH
5418static void do_ld_zpa(DisasContext *s, int zt, int pg,
5419 TCGv_i64 addr, int dtype, int nreg)
5420{
206adacf 5421 gen_helper_gvec_mem *fn
c182c6db 5422 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5423
206adacf
RH
5424 /*
5425 * While there are holes in the table, they are not
c4e7c493
RH
5426 * accessible via the instruction encoding.
5427 */
5428 assert(fn != NULL);
206adacf 5429 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5430}
5431
3a7be554 5432static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5433{
5434 if (a->rm == 31) {
5435 return false;
5436 }
5437 if (sve_access_check(s)) {
5438 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5439 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5440 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5441 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5442 }
5443 return true;
5444}
5445
3a7be554 5446static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5447{
5448 if (sve_access_check(s)) {
5449 int vsz = vec_full_reg_size(s);
5450 int elements = vsz >> dtype_esz[a->dtype];
5451 TCGv_i64 addr = new_tmp_a64(s);
5452
5453 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5454 (a->imm * elements * (a->nreg + 1))
5455 << dtype_msz(a->dtype));
5456 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5457 }
5458 return true;
5459}
e2654d75 5460
3a7be554 5461static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5462{
aa13f7c3
RH
5463 static gen_helper_gvec_mem * const fns[2][2][16] = {
5464 { /* mte inactive, little-endian */
5465 { gen_helper_sve_ldff1bb_r,
5466 gen_helper_sve_ldff1bhu_r,
5467 gen_helper_sve_ldff1bsu_r,
5468 gen_helper_sve_ldff1bdu_r,
5469
5470 gen_helper_sve_ldff1sds_le_r,
5471 gen_helper_sve_ldff1hh_le_r,
5472 gen_helper_sve_ldff1hsu_le_r,
5473 gen_helper_sve_ldff1hdu_le_r,
5474
5475 gen_helper_sve_ldff1hds_le_r,
5476 gen_helper_sve_ldff1hss_le_r,
5477 gen_helper_sve_ldff1ss_le_r,
5478 gen_helper_sve_ldff1sdu_le_r,
5479
5480 gen_helper_sve_ldff1bds_r,
5481 gen_helper_sve_ldff1bss_r,
5482 gen_helper_sve_ldff1bhs_r,
5483 gen_helper_sve_ldff1dd_le_r },
5484
5485 /* mte inactive, big-endian */
5486 { gen_helper_sve_ldff1bb_r,
5487 gen_helper_sve_ldff1bhu_r,
5488 gen_helper_sve_ldff1bsu_r,
5489 gen_helper_sve_ldff1bdu_r,
5490
5491 gen_helper_sve_ldff1sds_be_r,
5492 gen_helper_sve_ldff1hh_be_r,
5493 gen_helper_sve_ldff1hsu_be_r,
5494 gen_helper_sve_ldff1hdu_be_r,
5495
5496 gen_helper_sve_ldff1hds_be_r,
5497 gen_helper_sve_ldff1hss_be_r,
5498 gen_helper_sve_ldff1ss_be_r,
5499 gen_helper_sve_ldff1sdu_be_r,
5500
5501 gen_helper_sve_ldff1bds_r,
5502 gen_helper_sve_ldff1bss_r,
5503 gen_helper_sve_ldff1bhs_r,
5504 gen_helper_sve_ldff1dd_be_r } },
5505
5506 { /* mte active, little-endian */
5507 { gen_helper_sve_ldff1bb_r_mte,
5508 gen_helper_sve_ldff1bhu_r_mte,
5509 gen_helper_sve_ldff1bsu_r_mte,
5510 gen_helper_sve_ldff1bdu_r_mte,
5511
5512 gen_helper_sve_ldff1sds_le_r_mte,
5513 gen_helper_sve_ldff1hh_le_r_mte,
5514 gen_helper_sve_ldff1hsu_le_r_mte,
5515 gen_helper_sve_ldff1hdu_le_r_mte,
5516
5517 gen_helper_sve_ldff1hds_le_r_mte,
5518 gen_helper_sve_ldff1hss_le_r_mte,
5519 gen_helper_sve_ldff1ss_le_r_mte,
5520 gen_helper_sve_ldff1sdu_le_r_mte,
5521
5522 gen_helper_sve_ldff1bds_r_mte,
5523 gen_helper_sve_ldff1bss_r_mte,
5524 gen_helper_sve_ldff1bhs_r_mte,
5525 gen_helper_sve_ldff1dd_le_r_mte },
5526
5527 /* mte active, big-endian */
5528 { gen_helper_sve_ldff1bb_r_mte,
5529 gen_helper_sve_ldff1bhu_r_mte,
5530 gen_helper_sve_ldff1bsu_r_mte,
5531 gen_helper_sve_ldff1bdu_r_mte,
5532
5533 gen_helper_sve_ldff1sds_be_r_mte,
5534 gen_helper_sve_ldff1hh_be_r_mte,
5535 gen_helper_sve_ldff1hsu_be_r_mte,
5536 gen_helper_sve_ldff1hdu_be_r_mte,
5537
5538 gen_helper_sve_ldff1hds_be_r_mte,
5539 gen_helper_sve_ldff1hss_be_r_mte,
5540 gen_helper_sve_ldff1ss_be_r_mte,
5541 gen_helper_sve_ldff1sdu_be_r_mte,
5542
5543 gen_helper_sve_ldff1bds_r_mte,
5544 gen_helper_sve_ldff1bss_r_mte,
5545 gen_helper_sve_ldff1bhs_r_mte,
5546 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5547 };
5548
5549 if (sve_access_check(s)) {
5550 TCGv_i64 addr = new_tmp_a64(s);
5551 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5552 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5553 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5554 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5555 }
5556 return true;
5557}
5558
3a7be554 5559static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5560{
aa13f7c3
RH
5561 static gen_helper_gvec_mem * const fns[2][2][16] = {
5562 { /* mte inactive, little-endian */
5563 { gen_helper_sve_ldnf1bb_r,
5564 gen_helper_sve_ldnf1bhu_r,
5565 gen_helper_sve_ldnf1bsu_r,
5566 gen_helper_sve_ldnf1bdu_r,
5567
5568 gen_helper_sve_ldnf1sds_le_r,
5569 gen_helper_sve_ldnf1hh_le_r,
5570 gen_helper_sve_ldnf1hsu_le_r,
5571 gen_helper_sve_ldnf1hdu_le_r,
5572
5573 gen_helper_sve_ldnf1hds_le_r,
5574 gen_helper_sve_ldnf1hss_le_r,
5575 gen_helper_sve_ldnf1ss_le_r,
5576 gen_helper_sve_ldnf1sdu_le_r,
5577
5578 gen_helper_sve_ldnf1bds_r,
5579 gen_helper_sve_ldnf1bss_r,
5580 gen_helper_sve_ldnf1bhs_r,
5581 gen_helper_sve_ldnf1dd_le_r },
5582
5583 /* mte inactive, big-endian */
5584 { gen_helper_sve_ldnf1bb_r,
5585 gen_helper_sve_ldnf1bhu_r,
5586 gen_helper_sve_ldnf1bsu_r,
5587 gen_helper_sve_ldnf1bdu_r,
5588
5589 gen_helper_sve_ldnf1sds_be_r,
5590 gen_helper_sve_ldnf1hh_be_r,
5591 gen_helper_sve_ldnf1hsu_be_r,
5592 gen_helper_sve_ldnf1hdu_be_r,
5593
5594 gen_helper_sve_ldnf1hds_be_r,
5595 gen_helper_sve_ldnf1hss_be_r,
5596 gen_helper_sve_ldnf1ss_be_r,
5597 gen_helper_sve_ldnf1sdu_be_r,
5598
5599 gen_helper_sve_ldnf1bds_r,
5600 gen_helper_sve_ldnf1bss_r,
5601 gen_helper_sve_ldnf1bhs_r,
5602 gen_helper_sve_ldnf1dd_be_r } },
5603
5604 { /* mte inactive, little-endian */
5605 { gen_helper_sve_ldnf1bb_r_mte,
5606 gen_helper_sve_ldnf1bhu_r_mte,
5607 gen_helper_sve_ldnf1bsu_r_mte,
5608 gen_helper_sve_ldnf1bdu_r_mte,
5609
5610 gen_helper_sve_ldnf1sds_le_r_mte,
5611 gen_helper_sve_ldnf1hh_le_r_mte,
5612 gen_helper_sve_ldnf1hsu_le_r_mte,
5613 gen_helper_sve_ldnf1hdu_le_r_mte,
5614
5615 gen_helper_sve_ldnf1hds_le_r_mte,
5616 gen_helper_sve_ldnf1hss_le_r_mte,
5617 gen_helper_sve_ldnf1ss_le_r_mte,
5618 gen_helper_sve_ldnf1sdu_le_r_mte,
5619
5620 gen_helper_sve_ldnf1bds_r_mte,
5621 gen_helper_sve_ldnf1bss_r_mte,
5622 gen_helper_sve_ldnf1bhs_r_mte,
5623 gen_helper_sve_ldnf1dd_le_r_mte },
5624
5625 /* mte inactive, big-endian */
5626 { gen_helper_sve_ldnf1bb_r_mte,
5627 gen_helper_sve_ldnf1bhu_r_mte,
5628 gen_helper_sve_ldnf1bsu_r_mte,
5629 gen_helper_sve_ldnf1bdu_r_mte,
5630
5631 gen_helper_sve_ldnf1sds_be_r_mte,
5632 gen_helper_sve_ldnf1hh_be_r_mte,
5633 gen_helper_sve_ldnf1hsu_be_r_mte,
5634 gen_helper_sve_ldnf1hdu_be_r_mte,
5635
5636 gen_helper_sve_ldnf1hds_be_r_mte,
5637 gen_helper_sve_ldnf1hss_be_r_mte,
5638 gen_helper_sve_ldnf1ss_be_r_mte,
5639 gen_helper_sve_ldnf1sdu_be_r_mte,
5640
5641 gen_helper_sve_ldnf1bds_r_mte,
5642 gen_helper_sve_ldnf1bss_r_mte,
5643 gen_helper_sve_ldnf1bhs_r_mte,
5644 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5645 };
5646
5647 if (sve_access_check(s)) {
5648 int vsz = vec_full_reg_size(s);
5649 int elements = vsz >> dtype_esz[a->dtype];
5650 int off = (a->imm * elements) << dtype_msz(a->dtype);
5651 TCGv_i64 addr = new_tmp_a64(s);
5652
5653 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5654 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5655 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5656 }
5657 return true;
5658}
1a039c7e 5659
c182c6db 5660static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5661{
05abe304
RH
5662 unsigned vsz = vec_full_reg_size(s);
5663 TCGv_ptr t_pg;
7924d239 5664 int poff;
05abe304
RH
5665
5666 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5667 poff = pred_full_reg_offset(s, pg);
5668 if (vsz > 16) {
5669 /*
5670 * Zero-extend the first 16 bits of the predicate into a temporary.
5671 * This avoids triggering an assert making sure we don't have bits
5672 * set within a predicate beyond VQ, but we have lowered VQ to 1
5673 * for this load operation.
5674 */
5675 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5676#if HOST_BIG_ENDIAN
2a99ab2b
RH
5677 poff += 6;
5678#endif
5679 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5680
5681 poff = offsetof(CPUARMState, vfp.preg_tmp);
5682 tcg_gen_st_i64(tmp, cpu_env, poff);
5683 tcg_temp_free_i64(tmp);
5684 }
5685
05abe304 5686 t_pg = tcg_temp_new_ptr();
2a99ab2b 5687 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5688
c182c6db
RH
5689 gen_helper_gvec_mem *fn
5690 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5691 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5692
5693 tcg_temp_free_ptr(t_pg);
05abe304
RH
5694
5695 /* Replicate that first quadword. */
5696 if (vsz > 16) {
7924d239
RH
5697 int doff = vec_full_reg_offset(s, zt);
5698 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5699 }
5700}
5701
3a7be554 5702static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5703{
5704 if (a->rm == 31) {
5705 return false;
5706 }
5707 if (sve_access_check(s)) {
5708 int msz = dtype_msz(a->dtype);
5709 TCGv_i64 addr = new_tmp_a64(s);
5710 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5711 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5712 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5713 }
5714 return true;
5715}
5716
3a7be554 5717static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5718{
5719 if (sve_access_check(s)) {
5720 TCGv_i64 addr = new_tmp_a64(s);
5721 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5722 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5723 }
5724 return true;
5725}
5726
12c563f6
RH
5727static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5728{
5729 unsigned vsz = vec_full_reg_size(s);
5730 unsigned vsz_r32;
5731 TCGv_ptr t_pg;
5732 int poff, doff;
5733
5734 if (vsz < 32) {
5735 /*
5736 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5737 * in the ARM pseudocode, which is the sve_access_check() done
5738 * in our caller. We should not now return false from the caller.
5739 */
5740 unallocated_encoding(s);
5741 return;
5742 }
5743
5744 /* Load the first octaword using the normal predicated load helpers. */
5745
5746 poff = pred_full_reg_offset(s, pg);
5747 if (vsz > 32) {
5748 /*
5749 * Zero-extend the first 32 bits of the predicate into a temporary.
5750 * This avoids triggering an assert making sure we don't have bits
5751 * set within a predicate beyond VQ, but we have lowered VQ to 2
5752 * for this load operation.
5753 */
5754 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5755#if HOST_BIG_ENDIAN
12c563f6
RH
5756 poff += 4;
5757#endif
5758 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5759
5760 poff = offsetof(CPUARMState, vfp.preg_tmp);
5761 tcg_gen_st_i64(tmp, cpu_env, poff);
5762 tcg_temp_free_i64(tmp);
5763 }
5764
5765 t_pg = tcg_temp_new_ptr();
5766 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5767
5768 gen_helper_gvec_mem *fn
5769 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5770 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5771
5772 tcg_temp_free_ptr(t_pg);
5773
5774 /*
5775 * Replicate that first octaword.
5776 * The replication happens in units of 32; if the full vector size
5777 * is not a multiple of 32, the final bits are zeroed.
5778 */
5779 doff = vec_full_reg_offset(s, zt);
5780 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5781 if (vsz >= 64) {
5782 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5783 }
5784 vsz -= vsz_r32;
5785 if (vsz) {
5786 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5787 }
5788}
5789
5790static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5791{
5792 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5793 return false;
5794 }
5795 if (a->rm == 31) {
5796 return false;
5797 }
5798 if (sve_access_check(s)) {
5799 TCGv_i64 addr = new_tmp_a64(s);
5800 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5801 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5802 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5803 }
5804 return true;
5805}
5806
5807static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5808{
5809 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5810 return false;
5811 }
5812 if (sve_access_check(s)) {
5813 TCGv_i64 addr = new_tmp_a64(s);
5814 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5815 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5816 }
5817 return true;
5818}
5819
68459864 5820/* Load and broadcast element. */
3a7be554 5821static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5822{
68459864
RH
5823 unsigned vsz = vec_full_reg_size(s);
5824 unsigned psz = pred_full_reg_size(s);
5825 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5826 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5827 TCGLabel *over;
4ac430e1 5828 TCGv_i64 temp, clean_addr;
68459864 5829
c0ed9166
RH
5830 if (!sve_access_check(s)) {
5831 return true;
5832 }
5833
5834 over = gen_new_label();
5835
68459864
RH
5836 /* If the guarding predicate has no bits set, no load occurs. */
5837 if (psz <= 8) {
5838 /* Reduce the pred_esz_masks value simply to reduce the
5839 * size of the code generated here.
5840 */
5841 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5842 temp = tcg_temp_new_i64();
5843 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5844 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5845 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5846 tcg_temp_free_i64(temp);
5847 } else {
5848 TCGv_i32 t32 = tcg_temp_new_i32();
5849 find_last_active(s, t32, esz, a->pg);
5850 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5851 tcg_temp_free_i32(t32);
5852 }
5853
5854 /* Load the data. */
5855 temp = tcg_temp_new_i64();
d0e372b0 5856 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5857 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5858
5859 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5860 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5861
5862 /* Broadcast to *all* elements. */
5863 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5864 vsz, vsz, temp);
5865 tcg_temp_free_i64(temp);
5866
5867 /* Zero the inactive elements. */
5868 gen_set_label(over);
60245996 5869 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5870}
5871
1a039c7e
RH
5872static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5873 int msz, int esz, int nreg)
5874{
71b9f394
RH
5875 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5876 { { { gen_helper_sve_st1bb_r,
5877 gen_helper_sve_st1bh_r,
5878 gen_helper_sve_st1bs_r,
5879 gen_helper_sve_st1bd_r },
5880 { NULL,
5881 gen_helper_sve_st1hh_le_r,
5882 gen_helper_sve_st1hs_le_r,
5883 gen_helper_sve_st1hd_le_r },
5884 { NULL, NULL,
5885 gen_helper_sve_st1ss_le_r,
5886 gen_helper_sve_st1sd_le_r },
5887 { NULL, NULL, NULL,
5888 gen_helper_sve_st1dd_le_r } },
5889 { { gen_helper_sve_st1bb_r,
5890 gen_helper_sve_st1bh_r,
5891 gen_helper_sve_st1bs_r,
5892 gen_helper_sve_st1bd_r },
5893 { NULL,
5894 gen_helper_sve_st1hh_be_r,
5895 gen_helper_sve_st1hs_be_r,
5896 gen_helper_sve_st1hd_be_r },
5897 { NULL, NULL,
5898 gen_helper_sve_st1ss_be_r,
5899 gen_helper_sve_st1sd_be_r },
5900 { NULL, NULL, NULL,
5901 gen_helper_sve_st1dd_be_r } } },
5902
5903 { { { gen_helper_sve_st1bb_r_mte,
5904 gen_helper_sve_st1bh_r_mte,
5905 gen_helper_sve_st1bs_r_mte,
5906 gen_helper_sve_st1bd_r_mte },
5907 { NULL,
5908 gen_helper_sve_st1hh_le_r_mte,
5909 gen_helper_sve_st1hs_le_r_mte,
5910 gen_helper_sve_st1hd_le_r_mte },
5911 { NULL, NULL,
5912 gen_helper_sve_st1ss_le_r_mte,
5913 gen_helper_sve_st1sd_le_r_mte },
5914 { NULL, NULL, NULL,
5915 gen_helper_sve_st1dd_le_r_mte } },
5916 { { gen_helper_sve_st1bb_r_mte,
5917 gen_helper_sve_st1bh_r_mte,
5918 gen_helper_sve_st1bs_r_mte,
5919 gen_helper_sve_st1bd_r_mte },
5920 { NULL,
5921 gen_helper_sve_st1hh_be_r_mte,
5922 gen_helper_sve_st1hs_be_r_mte,
5923 gen_helper_sve_st1hd_be_r_mte },
5924 { NULL, NULL,
5925 gen_helper_sve_st1ss_be_r_mte,
5926 gen_helper_sve_st1sd_be_r_mte },
5927 { NULL, NULL, NULL,
5928 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5929 };
71b9f394
RH
5930 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5931 { { { gen_helper_sve_st2bb_r,
5932 gen_helper_sve_st2hh_le_r,
5933 gen_helper_sve_st2ss_le_r,
5934 gen_helper_sve_st2dd_le_r },
5935 { gen_helper_sve_st3bb_r,
5936 gen_helper_sve_st3hh_le_r,
5937 gen_helper_sve_st3ss_le_r,
5938 gen_helper_sve_st3dd_le_r },
5939 { gen_helper_sve_st4bb_r,
5940 gen_helper_sve_st4hh_le_r,
5941 gen_helper_sve_st4ss_le_r,
5942 gen_helper_sve_st4dd_le_r } },
5943 { { gen_helper_sve_st2bb_r,
5944 gen_helper_sve_st2hh_be_r,
5945 gen_helper_sve_st2ss_be_r,
5946 gen_helper_sve_st2dd_be_r },
5947 { gen_helper_sve_st3bb_r,
5948 gen_helper_sve_st3hh_be_r,
5949 gen_helper_sve_st3ss_be_r,
5950 gen_helper_sve_st3dd_be_r },
5951 { gen_helper_sve_st4bb_r,
5952 gen_helper_sve_st4hh_be_r,
5953 gen_helper_sve_st4ss_be_r,
5954 gen_helper_sve_st4dd_be_r } } },
5955 { { { gen_helper_sve_st2bb_r_mte,
5956 gen_helper_sve_st2hh_le_r_mte,
5957 gen_helper_sve_st2ss_le_r_mte,
5958 gen_helper_sve_st2dd_le_r_mte },
5959 { gen_helper_sve_st3bb_r_mte,
5960 gen_helper_sve_st3hh_le_r_mte,
5961 gen_helper_sve_st3ss_le_r_mte,
5962 gen_helper_sve_st3dd_le_r_mte },
5963 { gen_helper_sve_st4bb_r_mte,
5964 gen_helper_sve_st4hh_le_r_mte,
5965 gen_helper_sve_st4ss_le_r_mte,
5966 gen_helper_sve_st4dd_le_r_mte } },
5967 { { gen_helper_sve_st2bb_r_mte,
5968 gen_helper_sve_st2hh_be_r_mte,
5969 gen_helper_sve_st2ss_be_r_mte,
5970 gen_helper_sve_st2dd_be_r_mte },
5971 { gen_helper_sve_st3bb_r_mte,
5972 gen_helper_sve_st3hh_be_r_mte,
5973 gen_helper_sve_st3ss_be_r_mte,
5974 gen_helper_sve_st3dd_be_r_mte },
5975 { gen_helper_sve_st4bb_r_mte,
5976 gen_helper_sve_st4hh_be_r_mte,
5977 gen_helper_sve_st4ss_be_r_mte,
5978 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5979 };
5980 gen_helper_gvec_mem *fn;
28d57f2d 5981 int be = s->be_data == MO_BE;
1a039c7e
RH
5982
5983 if (nreg == 0) {
5984 /* ST1 */
71b9f394
RH
5985 fn = fn_single[s->mte_active[0]][be][msz][esz];
5986 nreg = 1;
1a039c7e
RH
5987 } else {
5988 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5989 assert(msz == esz);
71b9f394 5990 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5991 }
5992 assert(fn != NULL);
71b9f394 5993 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5994}
5995
3a7be554 5996static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5997{
5998 if (a->rm == 31 || a->msz > a->esz) {
5999 return false;
6000 }
6001 if (sve_access_check(s)) {
6002 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 6003 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
6004 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
6005 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6006 }
6007 return true;
6008}
6009
3a7be554 6010static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
6011{
6012 if (a->msz > a->esz) {
6013 return false;
6014 }
6015 if (sve_access_check(s)) {
6016 int vsz = vec_full_reg_size(s);
6017 int elements = vsz >> a->esz;
6018 TCGv_i64 addr = new_tmp_a64(s);
6019
6020 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
6021 (a->imm * elements * (a->nreg + 1)) << a->msz);
6022 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6023 }
6024 return true;
6025}
f6dbf62a
RH
6026
6027/*
6028 *** SVE gather loads / scatter stores
6029 */
6030
500d0484 6031static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 6032 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 6033 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
6034{
6035 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
6036 TCGv_ptr t_zm = tcg_temp_new_ptr();
6037 TCGv_ptr t_pg = tcg_temp_new_ptr();
6038 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 6039 TCGv_i32 t_desc;
d28d12f0 6040 int desc = 0;
500d0484 6041
d28d12f0
RH
6042 if (s->mte_active[0]) {
6043 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
6044 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
6045 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
6046 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 6047 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
6048 desc <<= SVE_MTEDESC_SHIFT;
6049 }
cdecb3fc 6050 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 6051 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
6052
6053 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
6054 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
6055 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 6056 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
6057
6058 tcg_temp_free_ptr(t_zt);
6059 tcg_temp_free_ptr(t_zm);
6060 tcg_temp_free_ptr(t_pg);
500d0484 6061 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
6062}
6063
d28d12f0
RH
6064/* Indexed by [mte][be][ff][xs][u][msz]. */
6065static gen_helper_gvec_mem_scatter * const
6066gather_load_fn32[2][2][2][2][2][3] = {
6067 { /* MTE Inactive */
6068 { /* Little-endian */
6069 { { { gen_helper_sve_ldbss_zsu,
6070 gen_helper_sve_ldhss_le_zsu,
6071 NULL, },
6072 { gen_helper_sve_ldbsu_zsu,
6073 gen_helper_sve_ldhsu_le_zsu,
6074 gen_helper_sve_ldss_le_zsu, } },
6075 { { gen_helper_sve_ldbss_zss,
6076 gen_helper_sve_ldhss_le_zss,
6077 NULL, },
6078 { gen_helper_sve_ldbsu_zss,
6079 gen_helper_sve_ldhsu_le_zss,
6080 gen_helper_sve_ldss_le_zss, } } },
6081
6082 /* First-fault */
6083 { { { gen_helper_sve_ldffbss_zsu,
6084 gen_helper_sve_ldffhss_le_zsu,
6085 NULL, },
6086 { gen_helper_sve_ldffbsu_zsu,
6087 gen_helper_sve_ldffhsu_le_zsu,
6088 gen_helper_sve_ldffss_le_zsu, } },
6089 { { gen_helper_sve_ldffbss_zss,
6090 gen_helper_sve_ldffhss_le_zss,
6091 NULL, },
6092 { gen_helper_sve_ldffbsu_zss,
6093 gen_helper_sve_ldffhsu_le_zss,
6094 gen_helper_sve_ldffss_le_zss, } } } },
6095
6096 { /* Big-endian */
6097 { { { gen_helper_sve_ldbss_zsu,
6098 gen_helper_sve_ldhss_be_zsu,
6099 NULL, },
6100 { gen_helper_sve_ldbsu_zsu,
6101 gen_helper_sve_ldhsu_be_zsu,
6102 gen_helper_sve_ldss_be_zsu, } },
6103 { { gen_helper_sve_ldbss_zss,
6104 gen_helper_sve_ldhss_be_zss,
6105 NULL, },
6106 { gen_helper_sve_ldbsu_zss,
6107 gen_helper_sve_ldhsu_be_zss,
6108 gen_helper_sve_ldss_be_zss, } } },
6109
6110 /* First-fault */
6111 { { { gen_helper_sve_ldffbss_zsu,
6112 gen_helper_sve_ldffhss_be_zsu,
6113 NULL, },
6114 { gen_helper_sve_ldffbsu_zsu,
6115 gen_helper_sve_ldffhsu_be_zsu,
6116 gen_helper_sve_ldffss_be_zsu, } },
6117 { { gen_helper_sve_ldffbss_zss,
6118 gen_helper_sve_ldffhss_be_zss,
6119 NULL, },
6120 { gen_helper_sve_ldffbsu_zss,
6121 gen_helper_sve_ldffhsu_be_zss,
6122 gen_helper_sve_ldffss_be_zss, } } } } },
6123 { /* MTE Active */
6124 { /* Little-endian */
6125 { { { gen_helper_sve_ldbss_zsu_mte,
6126 gen_helper_sve_ldhss_le_zsu_mte,
6127 NULL, },
6128 { gen_helper_sve_ldbsu_zsu_mte,
6129 gen_helper_sve_ldhsu_le_zsu_mte,
6130 gen_helper_sve_ldss_le_zsu_mte, } },
6131 { { gen_helper_sve_ldbss_zss_mte,
6132 gen_helper_sve_ldhss_le_zss_mte,
6133 NULL, },
6134 { gen_helper_sve_ldbsu_zss_mte,
6135 gen_helper_sve_ldhsu_le_zss_mte,
6136 gen_helper_sve_ldss_le_zss_mte, } } },
6137
6138 /* First-fault */
6139 { { { gen_helper_sve_ldffbss_zsu_mte,
6140 gen_helper_sve_ldffhss_le_zsu_mte,
6141 NULL, },
6142 { gen_helper_sve_ldffbsu_zsu_mte,
6143 gen_helper_sve_ldffhsu_le_zsu_mte,
6144 gen_helper_sve_ldffss_le_zsu_mte, } },
6145 { { gen_helper_sve_ldffbss_zss_mte,
6146 gen_helper_sve_ldffhss_le_zss_mte,
6147 NULL, },
6148 { gen_helper_sve_ldffbsu_zss_mte,
6149 gen_helper_sve_ldffhsu_le_zss_mte,
6150 gen_helper_sve_ldffss_le_zss_mte, } } } },
6151
6152 { /* Big-endian */
6153 { { { gen_helper_sve_ldbss_zsu_mte,
6154 gen_helper_sve_ldhss_be_zsu_mte,
6155 NULL, },
6156 { gen_helper_sve_ldbsu_zsu_mte,
6157 gen_helper_sve_ldhsu_be_zsu_mte,
6158 gen_helper_sve_ldss_be_zsu_mte, } },
6159 { { gen_helper_sve_ldbss_zss_mte,
6160 gen_helper_sve_ldhss_be_zss_mte,
6161 NULL, },
6162 { gen_helper_sve_ldbsu_zss_mte,
6163 gen_helper_sve_ldhsu_be_zss_mte,
6164 gen_helper_sve_ldss_be_zss_mte, } } },
6165
6166 /* First-fault */
6167 { { { gen_helper_sve_ldffbss_zsu_mte,
6168 gen_helper_sve_ldffhss_be_zsu_mte,
6169 NULL, },
6170 { gen_helper_sve_ldffbsu_zsu_mte,
6171 gen_helper_sve_ldffhsu_be_zsu_mte,
6172 gen_helper_sve_ldffss_be_zsu_mte, } },
6173 { { gen_helper_sve_ldffbss_zss_mte,
6174 gen_helper_sve_ldffhss_be_zss_mte,
6175 NULL, },
6176 { gen_helper_sve_ldffbsu_zss_mte,
6177 gen_helper_sve_ldffhsu_be_zss_mte,
6178 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
6179};
6180
6181/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6182static gen_helper_gvec_mem_scatter * const
6183gather_load_fn64[2][2][2][3][2][4] = {
6184 { /* MTE Inactive */
6185 { /* Little-endian */
6186 { { { gen_helper_sve_ldbds_zsu,
6187 gen_helper_sve_ldhds_le_zsu,
6188 gen_helper_sve_ldsds_le_zsu,
6189 NULL, },
6190 { gen_helper_sve_ldbdu_zsu,
6191 gen_helper_sve_ldhdu_le_zsu,
6192 gen_helper_sve_ldsdu_le_zsu,
6193 gen_helper_sve_lddd_le_zsu, } },
6194 { { gen_helper_sve_ldbds_zss,
6195 gen_helper_sve_ldhds_le_zss,
6196 gen_helper_sve_ldsds_le_zss,
6197 NULL, },
6198 { gen_helper_sve_ldbdu_zss,
6199 gen_helper_sve_ldhdu_le_zss,
6200 gen_helper_sve_ldsdu_le_zss,
6201 gen_helper_sve_lddd_le_zss, } },
6202 { { gen_helper_sve_ldbds_zd,
6203 gen_helper_sve_ldhds_le_zd,
6204 gen_helper_sve_ldsds_le_zd,
6205 NULL, },
6206 { gen_helper_sve_ldbdu_zd,
6207 gen_helper_sve_ldhdu_le_zd,
6208 gen_helper_sve_ldsdu_le_zd,
6209 gen_helper_sve_lddd_le_zd, } } },
6210
6211 /* First-fault */
6212 { { { gen_helper_sve_ldffbds_zsu,
6213 gen_helper_sve_ldffhds_le_zsu,
6214 gen_helper_sve_ldffsds_le_zsu,
6215 NULL, },
6216 { gen_helper_sve_ldffbdu_zsu,
6217 gen_helper_sve_ldffhdu_le_zsu,
6218 gen_helper_sve_ldffsdu_le_zsu,
6219 gen_helper_sve_ldffdd_le_zsu, } },
6220 { { gen_helper_sve_ldffbds_zss,
6221 gen_helper_sve_ldffhds_le_zss,
6222 gen_helper_sve_ldffsds_le_zss,
6223 NULL, },
6224 { gen_helper_sve_ldffbdu_zss,
6225 gen_helper_sve_ldffhdu_le_zss,
6226 gen_helper_sve_ldffsdu_le_zss,
6227 gen_helper_sve_ldffdd_le_zss, } },
6228 { { gen_helper_sve_ldffbds_zd,
6229 gen_helper_sve_ldffhds_le_zd,
6230 gen_helper_sve_ldffsds_le_zd,
6231 NULL, },
6232 { gen_helper_sve_ldffbdu_zd,
6233 gen_helper_sve_ldffhdu_le_zd,
6234 gen_helper_sve_ldffsdu_le_zd,
6235 gen_helper_sve_ldffdd_le_zd, } } } },
6236 { /* Big-endian */
6237 { { { gen_helper_sve_ldbds_zsu,
6238 gen_helper_sve_ldhds_be_zsu,
6239 gen_helper_sve_ldsds_be_zsu,
6240 NULL, },
6241 { gen_helper_sve_ldbdu_zsu,
6242 gen_helper_sve_ldhdu_be_zsu,
6243 gen_helper_sve_ldsdu_be_zsu,
6244 gen_helper_sve_lddd_be_zsu, } },
6245 { { gen_helper_sve_ldbds_zss,
6246 gen_helper_sve_ldhds_be_zss,
6247 gen_helper_sve_ldsds_be_zss,
6248 NULL, },
6249 { gen_helper_sve_ldbdu_zss,
6250 gen_helper_sve_ldhdu_be_zss,
6251 gen_helper_sve_ldsdu_be_zss,
6252 gen_helper_sve_lddd_be_zss, } },
6253 { { gen_helper_sve_ldbds_zd,
6254 gen_helper_sve_ldhds_be_zd,
6255 gen_helper_sve_ldsds_be_zd,
6256 NULL, },
6257 { gen_helper_sve_ldbdu_zd,
6258 gen_helper_sve_ldhdu_be_zd,
6259 gen_helper_sve_ldsdu_be_zd,
6260 gen_helper_sve_lddd_be_zd, } } },
6261
6262 /* First-fault */
6263 { { { gen_helper_sve_ldffbds_zsu,
6264 gen_helper_sve_ldffhds_be_zsu,
6265 gen_helper_sve_ldffsds_be_zsu,
6266 NULL, },
6267 { gen_helper_sve_ldffbdu_zsu,
6268 gen_helper_sve_ldffhdu_be_zsu,
6269 gen_helper_sve_ldffsdu_be_zsu,
6270 gen_helper_sve_ldffdd_be_zsu, } },
6271 { { gen_helper_sve_ldffbds_zss,
6272 gen_helper_sve_ldffhds_be_zss,
6273 gen_helper_sve_ldffsds_be_zss,
6274 NULL, },
6275 { gen_helper_sve_ldffbdu_zss,
6276 gen_helper_sve_ldffhdu_be_zss,
6277 gen_helper_sve_ldffsdu_be_zss,
6278 gen_helper_sve_ldffdd_be_zss, } },
6279 { { gen_helper_sve_ldffbds_zd,
6280 gen_helper_sve_ldffhds_be_zd,
6281 gen_helper_sve_ldffsds_be_zd,
6282 NULL, },
6283 { gen_helper_sve_ldffbdu_zd,
6284 gen_helper_sve_ldffhdu_be_zd,
6285 gen_helper_sve_ldffsdu_be_zd,
6286 gen_helper_sve_ldffdd_be_zd, } } } } },
6287 { /* MTE Active */
6288 { /* Little-endian */
6289 { { { gen_helper_sve_ldbds_zsu_mte,
6290 gen_helper_sve_ldhds_le_zsu_mte,
6291 gen_helper_sve_ldsds_le_zsu_mte,
6292 NULL, },
6293 { gen_helper_sve_ldbdu_zsu_mte,
6294 gen_helper_sve_ldhdu_le_zsu_mte,
6295 gen_helper_sve_ldsdu_le_zsu_mte,
6296 gen_helper_sve_lddd_le_zsu_mte, } },
6297 { { gen_helper_sve_ldbds_zss_mte,
6298 gen_helper_sve_ldhds_le_zss_mte,
6299 gen_helper_sve_ldsds_le_zss_mte,
6300 NULL, },
6301 { gen_helper_sve_ldbdu_zss_mte,
6302 gen_helper_sve_ldhdu_le_zss_mte,
6303 gen_helper_sve_ldsdu_le_zss_mte,
6304 gen_helper_sve_lddd_le_zss_mte, } },
6305 { { gen_helper_sve_ldbds_zd_mte,
6306 gen_helper_sve_ldhds_le_zd_mte,
6307 gen_helper_sve_ldsds_le_zd_mte,
6308 NULL, },
6309 { gen_helper_sve_ldbdu_zd_mte,
6310 gen_helper_sve_ldhdu_le_zd_mte,
6311 gen_helper_sve_ldsdu_le_zd_mte,
6312 gen_helper_sve_lddd_le_zd_mte, } } },
6313
6314 /* First-fault */
6315 { { { gen_helper_sve_ldffbds_zsu_mte,
6316 gen_helper_sve_ldffhds_le_zsu_mte,
6317 gen_helper_sve_ldffsds_le_zsu_mte,
6318 NULL, },
6319 { gen_helper_sve_ldffbdu_zsu_mte,
6320 gen_helper_sve_ldffhdu_le_zsu_mte,
6321 gen_helper_sve_ldffsdu_le_zsu_mte,
6322 gen_helper_sve_ldffdd_le_zsu_mte, } },
6323 { { gen_helper_sve_ldffbds_zss_mte,
6324 gen_helper_sve_ldffhds_le_zss_mte,
6325 gen_helper_sve_ldffsds_le_zss_mte,
6326 NULL, },
6327 { gen_helper_sve_ldffbdu_zss_mte,
6328 gen_helper_sve_ldffhdu_le_zss_mte,
6329 gen_helper_sve_ldffsdu_le_zss_mte,
6330 gen_helper_sve_ldffdd_le_zss_mte, } },
6331 { { gen_helper_sve_ldffbds_zd_mte,
6332 gen_helper_sve_ldffhds_le_zd_mte,
6333 gen_helper_sve_ldffsds_le_zd_mte,
6334 NULL, },
6335 { gen_helper_sve_ldffbdu_zd_mte,
6336 gen_helper_sve_ldffhdu_le_zd_mte,
6337 gen_helper_sve_ldffsdu_le_zd_mte,
6338 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6339 { /* Big-endian */
6340 { { { gen_helper_sve_ldbds_zsu_mte,
6341 gen_helper_sve_ldhds_be_zsu_mte,
6342 gen_helper_sve_ldsds_be_zsu_mte,
6343 NULL, },
6344 { gen_helper_sve_ldbdu_zsu_mte,
6345 gen_helper_sve_ldhdu_be_zsu_mte,
6346 gen_helper_sve_ldsdu_be_zsu_mte,
6347 gen_helper_sve_lddd_be_zsu_mte, } },
6348 { { gen_helper_sve_ldbds_zss_mte,
6349 gen_helper_sve_ldhds_be_zss_mte,
6350 gen_helper_sve_ldsds_be_zss_mte,
6351 NULL, },
6352 { gen_helper_sve_ldbdu_zss_mte,
6353 gen_helper_sve_ldhdu_be_zss_mte,
6354 gen_helper_sve_ldsdu_be_zss_mte,
6355 gen_helper_sve_lddd_be_zss_mte, } },
6356 { { gen_helper_sve_ldbds_zd_mte,
6357 gen_helper_sve_ldhds_be_zd_mte,
6358 gen_helper_sve_ldsds_be_zd_mte,
6359 NULL, },
6360 { gen_helper_sve_ldbdu_zd_mte,
6361 gen_helper_sve_ldhdu_be_zd_mte,
6362 gen_helper_sve_ldsdu_be_zd_mte,
6363 gen_helper_sve_lddd_be_zd_mte, } } },
6364
6365 /* First-fault */
6366 { { { gen_helper_sve_ldffbds_zsu_mte,
6367 gen_helper_sve_ldffhds_be_zsu_mte,
6368 gen_helper_sve_ldffsds_be_zsu_mte,
6369 NULL, },
6370 { gen_helper_sve_ldffbdu_zsu_mte,
6371 gen_helper_sve_ldffhdu_be_zsu_mte,
6372 gen_helper_sve_ldffsdu_be_zsu_mte,
6373 gen_helper_sve_ldffdd_be_zsu_mte, } },
6374 { { gen_helper_sve_ldffbds_zss_mte,
6375 gen_helper_sve_ldffhds_be_zss_mte,
6376 gen_helper_sve_ldffsds_be_zss_mte,
6377 NULL, },
6378 { gen_helper_sve_ldffbdu_zss_mte,
6379 gen_helper_sve_ldffhdu_be_zss_mte,
6380 gen_helper_sve_ldffsdu_be_zss_mte,
6381 gen_helper_sve_ldffdd_be_zss_mte, } },
6382 { { gen_helper_sve_ldffbds_zd_mte,
6383 gen_helper_sve_ldffhds_be_zd_mte,
6384 gen_helper_sve_ldffsds_be_zd_mte,
6385 NULL, },
6386 { gen_helper_sve_ldffbdu_zd_mte,
6387 gen_helper_sve_ldffhdu_be_zd_mte,
6388 gen_helper_sve_ldffsdu_be_zd_mte,
6389 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6390};
6391
3a7be554 6392static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6393{
6394 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6395 bool be = s->be_data == MO_BE;
6396 bool mte = s->mte_active[0];
673e9fa6
RH
6397
6398 if (!sve_access_check(s)) {
6399 return true;
6400 }
6401
6402 switch (a->esz) {
6403 case MO_32:
d28d12f0 6404 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6405 break;
6406 case MO_64:
d28d12f0 6407 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6408 break;
6409 }
6410 assert(fn != NULL);
6411
6412 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6413 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6414 return true;
6415}
6416
3a7be554 6417static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6418{
6419 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6420 bool be = s->be_data == MO_BE;
6421 bool mte = s->mte_active[0];
673e9fa6
RH
6422
6423 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6424 return false;
6425 }
6426 if (!sve_access_check(s)) {
6427 return true;
6428 }
6429
6430 switch (a->esz) {
6431 case MO_32:
d28d12f0 6432 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6433 break;
6434 case MO_64:
d28d12f0 6435 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6436 break;
6437 }
6438 assert(fn != NULL);
6439
6440 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6441 * by loading the immediate into the scalar parameter.
6442 */
2ccdf94f
RH
6443 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6444 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6445 return true;
6446}
6447
cf327449
SL
6448static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6449{
b17ab470
RH
6450 gen_helper_gvec_mem_scatter *fn = NULL;
6451 bool be = s->be_data == MO_BE;
6452 bool mte = s->mte_active[0];
6453
6454 if (a->esz < a->msz + !a->u) {
6455 return false;
6456 }
cf327449
SL
6457 if (!dc_isar_feature(aa64_sve2, s)) {
6458 return false;
6459 }
b17ab470
RH
6460 if (!sve_access_check(s)) {
6461 return true;
6462 }
6463
6464 switch (a->esz) {
6465 case MO_32:
6466 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6467 break;
6468 case MO_64:
6469 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6470 break;
6471 }
6472 assert(fn != NULL);
6473
6474 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6475 cpu_reg(s, a->rm), a->msz, false, fn);
6476 return true;
cf327449
SL
6477}
6478
d28d12f0
RH
6479/* Indexed by [mte][be][xs][msz]. */
6480static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6481 { /* MTE Inactive */
6482 { /* Little-endian */
6483 { gen_helper_sve_stbs_zsu,
6484 gen_helper_sve_sths_le_zsu,
6485 gen_helper_sve_stss_le_zsu, },
6486 { gen_helper_sve_stbs_zss,
6487 gen_helper_sve_sths_le_zss,
6488 gen_helper_sve_stss_le_zss, } },
6489 { /* Big-endian */
6490 { gen_helper_sve_stbs_zsu,
6491 gen_helper_sve_sths_be_zsu,
6492 gen_helper_sve_stss_be_zsu, },
6493 { gen_helper_sve_stbs_zss,
6494 gen_helper_sve_sths_be_zss,
6495 gen_helper_sve_stss_be_zss, } } },
6496 { /* MTE Active */
6497 { /* Little-endian */
6498 { gen_helper_sve_stbs_zsu_mte,
6499 gen_helper_sve_sths_le_zsu_mte,
6500 gen_helper_sve_stss_le_zsu_mte, },
6501 { gen_helper_sve_stbs_zss_mte,
6502 gen_helper_sve_sths_le_zss_mte,
6503 gen_helper_sve_stss_le_zss_mte, } },
6504 { /* Big-endian */
6505 { gen_helper_sve_stbs_zsu_mte,
6506 gen_helper_sve_sths_be_zsu_mte,
6507 gen_helper_sve_stss_be_zsu_mte, },
6508 { gen_helper_sve_stbs_zss_mte,
6509 gen_helper_sve_sths_be_zss_mte,
6510 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6511};
6512
6513/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6514static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6515 { /* MTE Inactive */
6516 { /* Little-endian */
6517 { gen_helper_sve_stbd_zsu,
6518 gen_helper_sve_sthd_le_zsu,
6519 gen_helper_sve_stsd_le_zsu,
6520 gen_helper_sve_stdd_le_zsu, },
6521 { gen_helper_sve_stbd_zss,
6522 gen_helper_sve_sthd_le_zss,
6523 gen_helper_sve_stsd_le_zss,
6524 gen_helper_sve_stdd_le_zss, },
6525 { gen_helper_sve_stbd_zd,
6526 gen_helper_sve_sthd_le_zd,
6527 gen_helper_sve_stsd_le_zd,
6528 gen_helper_sve_stdd_le_zd, } },
6529 { /* Big-endian */
6530 { gen_helper_sve_stbd_zsu,
6531 gen_helper_sve_sthd_be_zsu,
6532 gen_helper_sve_stsd_be_zsu,
6533 gen_helper_sve_stdd_be_zsu, },
6534 { gen_helper_sve_stbd_zss,
6535 gen_helper_sve_sthd_be_zss,
6536 gen_helper_sve_stsd_be_zss,
6537 gen_helper_sve_stdd_be_zss, },
6538 { gen_helper_sve_stbd_zd,
6539 gen_helper_sve_sthd_be_zd,
6540 gen_helper_sve_stsd_be_zd,
6541 gen_helper_sve_stdd_be_zd, } } },
6542 { /* MTE Inactive */
6543 { /* Little-endian */
6544 { gen_helper_sve_stbd_zsu_mte,
6545 gen_helper_sve_sthd_le_zsu_mte,
6546 gen_helper_sve_stsd_le_zsu_mte,
6547 gen_helper_sve_stdd_le_zsu_mte, },
6548 { gen_helper_sve_stbd_zss_mte,
6549 gen_helper_sve_sthd_le_zss_mte,
6550 gen_helper_sve_stsd_le_zss_mte,
6551 gen_helper_sve_stdd_le_zss_mte, },
6552 { gen_helper_sve_stbd_zd_mte,
6553 gen_helper_sve_sthd_le_zd_mte,
6554 gen_helper_sve_stsd_le_zd_mte,
6555 gen_helper_sve_stdd_le_zd_mte, } },
6556 { /* Big-endian */
6557 { gen_helper_sve_stbd_zsu_mte,
6558 gen_helper_sve_sthd_be_zsu_mte,
6559 gen_helper_sve_stsd_be_zsu_mte,
6560 gen_helper_sve_stdd_be_zsu_mte, },
6561 { gen_helper_sve_stbd_zss_mte,
6562 gen_helper_sve_sthd_be_zss_mte,
6563 gen_helper_sve_stsd_be_zss_mte,
6564 gen_helper_sve_stdd_be_zss_mte, },
6565 { gen_helper_sve_stbd_zd_mte,
6566 gen_helper_sve_sthd_be_zd_mte,
6567 gen_helper_sve_stsd_be_zd_mte,
6568 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6569};
6570
3a7be554 6571static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6572{
f6dbf62a 6573 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6574 bool be = s->be_data == MO_BE;
6575 bool mte = s->mte_active[0];
f6dbf62a
RH
6576
6577 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6578 return false;
6579 }
6580 if (!sve_access_check(s)) {
6581 return true;
6582 }
6583 switch (a->esz) {
6584 case MO_32:
d28d12f0 6585 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6586 break;
6587 case MO_64:
d28d12f0 6588 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6589 break;
6590 default:
6591 g_assert_not_reached();
6592 }
6593 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6594 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6595 return true;
6596}
dec6cf6b 6597
3a7be554 6598static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6599{
6600 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6601 bool be = s->be_data == MO_BE;
6602 bool mte = s->mte_active[0];
408ecde9
RH
6603
6604 if (a->esz < a->msz) {
6605 return false;
6606 }
6607 if (!sve_access_check(s)) {
6608 return true;
6609 }
6610
6611 switch (a->esz) {
6612 case MO_32:
d28d12f0 6613 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6614 break;
6615 case MO_64:
d28d12f0 6616 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6617 break;
6618 }
6619 assert(fn != NULL);
6620
6621 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6622 * by loading the immediate into the scalar parameter.
6623 */
2ccdf94f
RH
6624 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6625 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6626 return true;
6627}
6628
6ebca45f
SL
6629static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6630{
b17ab470
RH
6631 gen_helper_gvec_mem_scatter *fn;
6632 bool be = s->be_data == MO_BE;
6633 bool mte = s->mte_active[0];
6634
6635 if (a->esz < a->msz) {
6636 return false;
6637 }
6ebca45f
SL
6638 if (!dc_isar_feature(aa64_sve2, s)) {
6639 return false;
6640 }
b17ab470
RH
6641 if (!sve_access_check(s)) {
6642 return true;
6643 }
6644
6645 switch (a->esz) {
6646 case MO_32:
6647 fn = scatter_store_fn32[mte][be][0][a->msz];
6648 break;
6649 case MO_64:
6650 fn = scatter_store_fn64[mte][be][2][a->msz];
6651 break;
6652 default:
6653 g_assert_not_reached();
6654 }
6655
6656 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6657 cpu_reg(s, a->rm), a->msz, true, fn);
6658 return true;
6ebca45f
SL
6659}
6660
dec6cf6b
RH
6661/*
6662 * Prefetches
6663 */
6664
3a7be554 6665static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6666{
6667 /* Prefetch is a nop within QEMU. */
2f95a3b0 6668 (void)sve_access_check(s);
dec6cf6b
RH
6669 return true;
6670}
6671
3a7be554 6672static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6673{
6674 if (a->rm == 31) {
6675 return false;
6676 }
6677 /* Prefetch is a nop within QEMU. */
2f95a3b0 6678 (void)sve_access_check(s);
dec6cf6b
RH
6679 return true;
6680}
a2103582
RH
6681
6682/*
6683 * Move Prefix
6684 *
6685 * TODO: The implementation so far could handle predicated merging movprfx.
6686 * The helper functions as written take an extra source register to
6687 * use in the operation, but the result is only written when predication
6688 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6689 * to allow the final write back to the destination to be unconditional.
6690 * For predicated zeroing movprfx, we need to rearrange the helpers to
6691 * allow the final write back to zero inactives.
6692 *
6693 * In the meantime, just emit the moves.
6694 */
6695
3a7be554 6696static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6697{
6698 return do_mov_z(s, a->rd, a->rn);
6699}
6700
3a7be554 6701static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6702{
6703 if (sve_access_check(s)) {
6704 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6705 }
6706 return true;
6707}
6708
3a7be554 6709static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6710{
60245996 6711 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6712}
5dad1ba5
RH
6713
6714/*
6715 * SVE2 Integer Multiply - Unpredicated
6716 */
6717
6718static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6719{
6720 if (!dc_isar_feature(aa64_sve2, s)) {
6721 return false;
6722 }
6723 if (sve_access_check(s)) {
6724 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6725 }
6726 return true;
6727}
6728
6729static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6730 gen_helper_gvec_3 *fn)
6731{
6732 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6733 return false;
6734 }
6735 if (sve_access_check(s)) {
6736 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6737 }
6738 return true;
6739}
6740
6741static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6742{
6743 static gen_helper_gvec_3 * const fns[4] = {
6744 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6745 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6746 };
6747 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6748}
6749
6750static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6751{
6752 static gen_helper_gvec_3 * const fns[4] = {
6753 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6754 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6755 };
6756 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6757}
6758
6759static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6760{
6761 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6762}
d4b1e59d 6763
169d7c58
RH
6764static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6765{
6766 static gen_helper_gvec_3 * const fns[4] = {
6767 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6768 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6769 };
6770 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6771}
6772
6773static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6774{
6775 static gen_helper_gvec_3 * const fns[4] = {
6776 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6777 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6778 };
6779 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6780}
6781
d4b1e59d
RH
6782/*
6783 * SVE2 Integer - Predicated
6784 */
6785
6786static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6787 gen_helper_gvec_4 *fn)
6788{
6789 if (!dc_isar_feature(aa64_sve2, s)) {
6790 return false;
6791 }
6792 return do_zpzz_ool(s, a, fn);
6793}
6794
6795static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6796{
6797 static gen_helper_gvec_4 * const fns[3] = {
6798 gen_helper_sve2_sadalp_zpzz_h,
6799 gen_helper_sve2_sadalp_zpzz_s,
6800 gen_helper_sve2_sadalp_zpzz_d,
6801 };
6802 if (a->esz == 0) {
6803 return false;
6804 }
6805 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6806}
6807
6808static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6809{
6810 static gen_helper_gvec_4 * const fns[3] = {
6811 gen_helper_sve2_uadalp_zpzz_h,
6812 gen_helper_sve2_uadalp_zpzz_s,
6813 gen_helper_sve2_uadalp_zpzz_d,
6814 };
6815 if (a->esz == 0) {
6816 return false;
6817 }
6818 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6819}
db366da8
RH
6820
6821/*
6822 * SVE2 integer unary operations (predicated)
6823 */
6824
6825static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6826 gen_helper_gvec_3 *fn)
6827{
6828 if (!dc_isar_feature(aa64_sve2, s)) {
6829 return false;
6830 }
6831 return do_zpz_ool(s, a, fn);
6832}
6833
6834static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6835{
6836 if (a->esz != 2) {
6837 return false;
6838 }
6839 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6840}
6841
6842static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6843{
6844 if (a->esz != 2) {
6845 return false;
6846 }
6847 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6848}
6849
6850static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6851{
6852 static gen_helper_gvec_3 * const fns[4] = {
6853 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6854 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6855 };
6856 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6857}
6858
6859static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6860{
6861 static gen_helper_gvec_3 * const fns[4] = {
6862 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6863 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6864 };
6865 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6866}
45d9503d
RH
6867
6868#define DO_SVE2_ZPZZ(NAME, name) \
6869static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6870{ \
6871 static gen_helper_gvec_4 * const fns[4] = { \
6872 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6873 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6874 }; \
6875 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6876}
6877
6878DO_SVE2_ZPZZ(SQSHL, sqshl)
6879DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6880DO_SVE2_ZPZZ(SRSHL, srshl)
6881
6882DO_SVE2_ZPZZ(UQSHL, uqshl)
6883DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6884DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6885
6886DO_SVE2_ZPZZ(SHADD, shadd)
6887DO_SVE2_ZPZZ(SRHADD, srhadd)
6888DO_SVE2_ZPZZ(SHSUB, shsub)
6889
6890DO_SVE2_ZPZZ(UHADD, uhadd)
6891DO_SVE2_ZPZZ(URHADD, urhadd)
6892DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6893
6894DO_SVE2_ZPZZ(ADDP, addp)
6895DO_SVE2_ZPZZ(SMAXP, smaxp)
6896DO_SVE2_ZPZZ(UMAXP, umaxp)
6897DO_SVE2_ZPZZ(SMINP, sminp)
6898DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6899
6900DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6901DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6902DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6903DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6904DO_SVE2_ZPZZ(SUQADD, suqadd)
6905DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6906
6907/*
6908 * SVE2 Widening Integer Arithmetic
6909 */
6910
6911static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6912 gen_helper_gvec_3 *fn, int data)
6913{
6914 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6915 return false;
6916 }
6917 if (sve_access_check(s)) {
6918 unsigned vsz = vec_full_reg_size(s);
6919 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6920 vec_full_reg_offset(s, a->rn),
6921 vec_full_reg_offset(s, a->rm),
6922 vsz, vsz, data, fn);
6923 }
6924 return true;
6925}
6926
6927#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6928static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6929{ \
6930 static gen_helper_gvec_3 * const fns[4] = { \
6931 NULL, gen_helper_sve2_##name##_h, \
6932 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6933 }; \
6934 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6935}
6936
6937DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6938DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6939DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6940
6941DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6942DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6943DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6944
6945DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6946DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6947DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6948
6949DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6950DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6951DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
daec426b
RH
6952
6953DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6954DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6955DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
81fccf09 6956
69ccc099
RH
6957DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6958DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6959
6960DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6961DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6962
6963DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6964DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6965
2df3ca55
RH
6966static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6967{
6968 static gen_helper_gvec_3 * const fns[4] = {
6969 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6970 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6971 };
6972 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6973}
6974
6975static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6976{
6977 return do_eor_tb(s, a, false);
6978}
6979
6980static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6981{
6982 return do_eor_tb(s, a, true);
6983}
6984
e3a56131
RH
6985static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6986{
6987 static gen_helper_gvec_3 * const fns[4] = {
6988 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6989 NULL, gen_helper_sve2_pmull_d,
6990 };
6991 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6992 return false;
6993 }
6994 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6995}
6996
6997static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6998{
6999 return do_trans_pmull(s, a, false);
7000}
7001
7002static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
7003{
7004 return do_trans_pmull(s, a, true);
7005}
7006
81fccf09
RH
7007#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
7008static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
7009{ \
7010 static gen_helper_gvec_3 * const fns[4] = { \
7011 NULL, gen_helper_sve2_##name##_h, \
7012 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7013 }; \
7014 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
7015}
7016
7017DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
7018DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
7019DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
7020DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
7021
7022DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
7023DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
7024DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
7025DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
4269fef1
RH
7026
7027static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7028{
7029 int top = imm & 1;
7030 int shl = imm >> 1;
7031 int halfbits = 4 << vece;
7032
7033 if (top) {
7034 if (shl == halfbits) {
7035 TCGv_vec t = tcg_temp_new_vec_matching(d);
7036 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7037 tcg_gen_and_vec(vece, d, n, t);
7038 tcg_temp_free_vec(t);
7039 } else {
7040 tcg_gen_sari_vec(vece, d, n, halfbits);
7041 tcg_gen_shli_vec(vece, d, d, shl);
7042 }
7043 } else {
7044 tcg_gen_shli_vec(vece, d, n, halfbits);
7045 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
7046 }
7047}
7048
7049static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
7050{
7051 int halfbits = 4 << vece;
7052 int top = imm & 1;
7053 int shl = (imm >> 1);
7054 int shift;
7055 uint64_t mask;
7056
7057 mask = MAKE_64BIT_MASK(0, halfbits);
7058 mask <<= shl;
7059 mask = dup_const(vece, mask);
7060
7061 shift = shl - top * halfbits;
7062 if (shift < 0) {
7063 tcg_gen_shri_i64(d, n, -shift);
7064 } else {
7065 tcg_gen_shli_i64(d, n, shift);
7066 }
7067 tcg_gen_andi_i64(d, d, mask);
7068}
7069
7070static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7071{
7072 gen_ushll_i64(MO_16, d, n, imm);
7073}
7074
7075static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7076{
7077 gen_ushll_i64(MO_32, d, n, imm);
7078}
7079
7080static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7081{
7082 gen_ushll_i64(MO_64, d, n, imm);
7083}
7084
7085static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7086{
7087 int halfbits = 4 << vece;
7088 int top = imm & 1;
7089 int shl = imm >> 1;
7090
7091 if (top) {
7092 if (shl == halfbits) {
7093 TCGv_vec t = tcg_temp_new_vec_matching(d);
7094 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7095 tcg_gen_and_vec(vece, d, n, t);
7096 tcg_temp_free_vec(t);
7097 } else {
7098 tcg_gen_shri_vec(vece, d, n, halfbits);
7099 tcg_gen_shli_vec(vece, d, d, shl);
7100 }
7101 } else {
7102 if (shl == 0) {
7103 TCGv_vec t = tcg_temp_new_vec_matching(d);
7104 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7105 tcg_gen_and_vec(vece, d, n, t);
7106 tcg_temp_free_vec(t);
7107 } else {
7108 tcg_gen_shli_vec(vece, d, n, halfbits);
7109 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7110 }
7111 }
7112}
7113
7114static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7115 bool sel, bool uns)
7116{
7117 static const TCGOpcode sshll_list[] = {
7118 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7119 };
7120 static const TCGOpcode ushll_list[] = {
7121 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7122 };
7123 static const GVecGen2i ops[2][3] = {
7124 { { .fniv = gen_sshll_vec,
7125 .opt_opc = sshll_list,
7126 .fno = gen_helper_sve2_sshll_h,
7127 .vece = MO_16 },
7128 { .fniv = gen_sshll_vec,
7129 .opt_opc = sshll_list,
7130 .fno = gen_helper_sve2_sshll_s,
7131 .vece = MO_32 },
7132 { .fniv = gen_sshll_vec,
7133 .opt_opc = sshll_list,
7134 .fno = gen_helper_sve2_sshll_d,
7135 .vece = MO_64 } },
7136 { { .fni8 = gen_ushll16_i64,
7137 .fniv = gen_ushll_vec,
7138 .opt_opc = ushll_list,
7139 .fno = gen_helper_sve2_ushll_h,
7140 .vece = MO_16 },
7141 { .fni8 = gen_ushll32_i64,
7142 .fniv = gen_ushll_vec,
7143 .opt_opc = ushll_list,
7144 .fno = gen_helper_sve2_ushll_s,
7145 .vece = MO_32 },
7146 { .fni8 = gen_ushll64_i64,
7147 .fniv = gen_ushll_vec,
7148 .opt_opc = ushll_list,
7149 .fno = gen_helper_sve2_ushll_d,
7150 .vece = MO_64 } },
7151 };
7152
7153 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7154 return false;
7155 }
7156 if (sve_access_check(s)) {
7157 unsigned vsz = vec_full_reg_size(s);
7158 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7159 vec_full_reg_offset(s, a->rn),
7160 vsz, vsz, (a->imm << 1) | sel,
7161 &ops[uns][a->esz]);
7162 }
7163 return true;
7164}
7165
7166static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7167{
7168 return do_sve2_shll_tb(s, a, false, false);
7169}
7170
7171static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7172{
7173 return do_sve2_shll_tb(s, a, true, false);
7174}
7175
7176static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7177{
7178 return do_sve2_shll_tb(s, a, false, true);
7179}
7180
7181static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7182{
7183 return do_sve2_shll_tb(s, a, true, true);
7184}
cb9c33b8
RH
7185
7186static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7187{
7188 static gen_helper_gvec_3 * const fns[4] = {
7189 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7190 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7191 };
7192 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7193 return false;
7194 }
7195 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7196}
7197
7198static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7199{
7200 static gen_helper_gvec_3 * const fns[4] = {
7201 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7202 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7203 };
7204 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7205 return false;
7206 }
7207 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7208}
7209
7210static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7211{
7212 static gen_helper_gvec_3 * const fns[4] = {
7213 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7214 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7215 };
7216 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7217 return false;
7218 }
7219 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7220}
ed4a6387
RH
7221
7222static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7223{
7224 static gen_helper_gvec_3 * const fns[2][4] = {
7225 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7226 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7227 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7228 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7229 };
7230 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7231}
7232
7233static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7234{
7235 return do_cadd(s, a, false, false);
7236}
7237
7238static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7239{
7240 return do_cadd(s, a, false, true);
7241}
7242
7243static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7244{
7245 return do_cadd(s, a, true, false);
7246}
7247
7248static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7249{
7250 return do_cadd(s, a, true, true);
7251}
38650638
RH
7252
7253static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7254 gen_helper_gvec_4 *fn, int data)
7255{
7256 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7257 return false;
7258 }
7259 if (sve_access_check(s)) {
7260 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7261 }
7262 return true;
7263}
7264
7265static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7266{
7267 static gen_helper_gvec_4 * const fns[2][4] = {
7268 { NULL, gen_helper_sve2_sabal_h,
7269 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7270 { NULL, gen_helper_sve2_uabal_h,
7271 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7272 };
7273 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7274}
7275
7276static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7277{
7278 return do_abal(s, a, false, false);
7279}
7280
7281static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7282{
7283 return do_abal(s, a, false, true);
7284}
7285
7286static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7287{
7288 return do_abal(s, a, true, false);
7289}
7290
7291static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7292{
7293 return do_abal(s, a, true, true);
7294}
b8295dfb
RH
7295
7296static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7297{
7298 static gen_helper_gvec_4 * const fns[2] = {
7299 gen_helper_sve2_adcl_s,
7300 gen_helper_sve2_adcl_d,
7301 };
7302 /*
7303 * Note that in this case the ESZ field encodes both size and sign.
7304 * Split out 'subtract' into bit 1 of the data field for the helper.
7305 */
7306 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7307}
7308
7309static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7310{
7311 return do_adcl(s, a, false);
7312}
7313
7314static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7315{
7316 return do_adcl(s, a, true);
7317}
a7e3a90e
RH
7318
7319static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7320{
7321 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7322 return false;
7323 }
7324 if (sve_access_check(s)) {
7325 unsigned vsz = vec_full_reg_size(s);
7326 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7327 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7328 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7329 }
7330 return true;
7331}
7332
7333static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7334{
7335 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7336}
7337
7338static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7339{
7340 return do_sve2_fn2i(s, a, gen_gvec_usra);
7341}
7342
7343static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7344{
7345 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7346}
7347
7348static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7349{
7350 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7351}
fc12b46a
RH
7352
7353static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7354{
7355 return do_sve2_fn2i(s, a, gen_gvec_sri);
7356}
7357
7358static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7359{
7360 return do_sve2_fn2i(s, a, gen_gvec_sli);
7361}
289a1797
RH
7362
7363static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7364{
7365 if (!dc_isar_feature(aa64_sve2, s)) {
7366 return false;
7367 }
7368 if (sve_access_check(s)) {
7369 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7370 }
7371 return true;
7372}
7373
7374static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7375{
7376 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7377}
7378
7379static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7380{
7381 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7382}
5ff2838d
RH
7383
7384static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7385 const GVecGen2 ops[3])
7386{
7387 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7388 !dc_isar_feature(aa64_sve2, s)) {
7389 return false;
7390 }
7391 if (sve_access_check(s)) {
7392 unsigned vsz = vec_full_reg_size(s);
7393 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7394 vec_full_reg_offset(s, a->rn),
7395 vsz, vsz, &ops[a->esz]);
7396 }
7397 return true;
7398}
7399
7400static const TCGOpcode sqxtn_list[] = {
7401 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7402};
7403
7404static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7405{
7406 TCGv_vec t = tcg_temp_new_vec_matching(d);
7407 int halfbits = 4 << vece;
7408 int64_t mask = (1ull << halfbits) - 1;
7409 int64_t min = -1ull << (halfbits - 1);
7410 int64_t max = -min - 1;
7411
7412 tcg_gen_dupi_vec(vece, t, min);
7413 tcg_gen_smax_vec(vece, d, n, t);
7414 tcg_gen_dupi_vec(vece, t, max);
7415 tcg_gen_smin_vec(vece, d, d, t);
7416 tcg_gen_dupi_vec(vece, t, mask);
7417 tcg_gen_and_vec(vece, d, d, t);
7418 tcg_temp_free_vec(t);
7419}
7420
7421static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7422{
7423 static const GVecGen2 ops[3] = {
7424 { .fniv = gen_sqxtnb_vec,
7425 .opt_opc = sqxtn_list,
7426 .fno = gen_helper_sve2_sqxtnb_h,
7427 .vece = MO_16 },
7428 { .fniv = gen_sqxtnb_vec,
7429 .opt_opc = sqxtn_list,
7430 .fno = gen_helper_sve2_sqxtnb_s,
7431 .vece = MO_32 },
7432 { .fniv = gen_sqxtnb_vec,
7433 .opt_opc = sqxtn_list,
7434 .fno = gen_helper_sve2_sqxtnb_d,
7435 .vece = MO_64 },
7436 };
7437 return do_sve2_narrow_extract(s, a, ops);
7438}
7439
7440static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7441{
7442 TCGv_vec t = tcg_temp_new_vec_matching(d);
7443 int halfbits = 4 << vece;
7444 int64_t mask = (1ull << halfbits) - 1;
7445 int64_t min = -1ull << (halfbits - 1);
7446 int64_t max = -min - 1;
7447
7448 tcg_gen_dupi_vec(vece, t, min);
7449 tcg_gen_smax_vec(vece, n, n, t);
7450 tcg_gen_dupi_vec(vece, t, max);
7451 tcg_gen_smin_vec(vece, n, n, t);
7452 tcg_gen_shli_vec(vece, n, n, halfbits);
7453 tcg_gen_dupi_vec(vece, t, mask);
7454 tcg_gen_bitsel_vec(vece, d, t, d, n);
7455 tcg_temp_free_vec(t);
7456}
7457
7458static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7459{
7460 static const GVecGen2 ops[3] = {
7461 { .fniv = gen_sqxtnt_vec,
7462 .opt_opc = sqxtn_list,
7463 .load_dest = true,
7464 .fno = gen_helper_sve2_sqxtnt_h,
7465 .vece = MO_16 },
7466 { .fniv = gen_sqxtnt_vec,
7467 .opt_opc = sqxtn_list,
7468 .load_dest = true,
7469 .fno = gen_helper_sve2_sqxtnt_s,
7470 .vece = MO_32 },
7471 { .fniv = gen_sqxtnt_vec,
7472 .opt_opc = sqxtn_list,
7473 .load_dest = true,
7474 .fno = gen_helper_sve2_sqxtnt_d,
7475 .vece = MO_64 },
7476 };
7477 return do_sve2_narrow_extract(s, a, ops);
7478}
7479
7480static const TCGOpcode uqxtn_list[] = {
7481 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7482};
7483
7484static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7485{
7486 TCGv_vec t = tcg_temp_new_vec_matching(d);
7487 int halfbits = 4 << vece;
7488 int64_t max = (1ull << halfbits) - 1;
7489
7490 tcg_gen_dupi_vec(vece, t, max);
7491 tcg_gen_umin_vec(vece, d, n, t);
7492 tcg_temp_free_vec(t);
7493}
7494
7495static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7496{
7497 static const GVecGen2 ops[3] = {
7498 { .fniv = gen_uqxtnb_vec,
7499 .opt_opc = uqxtn_list,
7500 .fno = gen_helper_sve2_uqxtnb_h,
7501 .vece = MO_16 },
7502 { .fniv = gen_uqxtnb_vec,
7503 .opt_opc = uqxtn_list,
7504 .fno = gen_helper_sve2_uqxtnb_s,
7505 .vece = MO_32 },
7506 { .fniv = gen_uqxtnb_vec,
7507 .opt_opc = uqxtn_list,
7508 .fno = gen_helper_sve2_uqxtnb_d,
7509 .vece = MO_64 },
7510 };
7511 return do_sve2_narrow_extract(s, a, ops);
7512}
7513
7514static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7515{
7516 TCGv_vec t = tcg_temp_new_vec_matching(d);
7517 int halfbits = 4 << vece;
7518 int64_t max = (1ull << halfbits) - 1;
7519
7520 tcg_gen_dupi_vec(vece, t, max);
7521 tcg_gen_umin_vec(vece, n, n, t);
7522 tcg_gen_shli_vec(vece, n, n, halfbits);
7523 tcg_gen_bitsel_vec(vece, d, t, d, n);
7524 tcg_temp_free_vec(t);
7525}
7526
7527static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7528{
7529 static const GVecGen2 ops[3] = {
7530 { .fniv = gen_uqxtnt_vec,
7531 .opt_opc = uqxtn_list,
7532 .load_dest = true,
7533 .fno = gen_helper_sve2_uqxtnt_h,
7534 .vece = MO_16 },
7535 { .fniv = gen_uqxtnt_vec,
7536 .opt_opc = uqxtn_list,
7537 .load_dest = true,
7538 .fno = gen_helper_sve2_uqxtnt_s,
7539 .vece = MO_32 },
7540 { .fniv = gen_uqxtnt_vec,
7541 .opt_opc = uqxtn_list,
7542 .load_dest = true,
7543 .fno = gen_helper_sve2_uqxtnt_d,
7544 .vece = MO_64 },
7545 };
7546 return do_sve2_narrow_extract(s, a, ops);
7547}
7548
7549static const TCGOpcode sqxtun_list[] = {
7550 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7551};
7552
7553static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7554{
7555 TCGv_vec t = tcg_temp_new_vec_matching(d);
7556 int halfbits = 4 << vece;
7557 int64_t max = (1ull << halfbits) - 1;
7558
7559 tcg_gen_dupi_vec(vece, t, 0);
7560 tcg_gen_smax_vec(vece, d, n, t);
7561 tcg_gen_dupi_vec(vece, t, max);
7562 tcg_gen_umin_vec(vece, d, d, t);
7563 tcg_temp_free_vec(t);
7564}
7565
7566static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7567{
7568 static const GVecGen2 ops[3] = {
7569 { .fniv = gen_sqxtunb_vec,
7570 .opt_opc = sqxtun_list,
7571 .fno = gen_helper_sve2_sqxtunb_h,
7572 .vece = MO_16 },
7573 { .fniv = gen_sqxtunb_vec,
7574 .opt_opc = sqxtun_list,
7575 .fno = gen_helper_sve2_sqxtunb_s,
7576 .vece = MO_32 },
7577 { .fniv = gen_sqxtunb_vec,
7578 .opt_opc = sqxtun_list,
7579 .fno = gen_helper_sve2_sqxtunb_d,
7580 .vece = MO_64 },
7581 };
7582 return do_sve2_narrow_extract(s, a, ops);
7583}
7584
7585static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7586{
7587 TCGv_vec t = tcg_temp_new_vec_matching(d);
7588 int halfbits = 4 << vece;
7589 int64_t max = (1ull << halfbits) - 1;
7590
7591 tcg_gen_dupi_vec(vece, t, 0);
7592 tcg_gen_smax_vec(vece, n, n, t);
7593 tcg_gen_dupi_vec(vece, t, max);
7594 tcg_gen_umin_vec(vece, n, n, t);
7595 tcg_gen_shli_vec(vece, n, n, halfbits);
7596 tcg_gen_bitsel_vec(vece, d, t, d, n);
7597 tcg_temp_free_vec(t);
7598}
7599
7600static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7601{
7602 static const GVecGen2 ops[3] = {
7603 { .fniv = gen_sqxtunt_vec,
7604 .opt_opc = sqxtun_list,
7605 .load_dest = true,
7606 .fno = gen_helper_sve2_sqxtunt_h,
7607 .vece = MO_16 },
7608 { .fniv = gen_sqxtunt_vec,
7609 .opt_opc = sqxtun_list,
7610 .load_dest = true,
7611 .fno = gen_helper_sve2_sqxtunt_s,
7612 .vece = MO_32 },
7613 { .fniv = gen_sqxtunt_vec,
7614 .opt_opc = sqxtun_list,
7615 .load_dest = true,
7616 .fno = gen_helper_sve2_sqxtunt_d,
7617 .vece = MO_64 },
7618 };
7619 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7620}
7621
7622static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7623 const GVecGen2i ops[3])
7624{
7625 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7626 return false;
7627 }
7628 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7629 if (sve_access_check(s)) {
7630 unsigned vsz = vec_full_reg_size(s);
7631 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7632 vec_full_reg_offset(s, a->rn),
7633 vsz, vsz, a->imm, &ops[a->esz]);
7634 }
7635 return true;
7636}
7637
7638static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7639{
7640 int halfbits = 4 << vece;
7641 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7642
7643 tcg_gen_shri_i64(d, n, shr);
7644 tcg_gen_andi_i64(d, d, mask);
7645}
7646
7647static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7648{
7649 gen_shrnb_i64(MO_16, d, n, shr);
7650}
7651
7652static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7653{
7654 gen_shrnb_i64(MO_32, d, n, shr);
7655}
7656
7657static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7658{
7659 gen_shrnb_i64(MO_64, d, n, shr);
7660}
7661
7662static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7663{
7664 TCGv_vec t = tcg_temp_new_vec_matching(d);
7665 int halfbits = 4 << vece;
7666 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7667
7668 tcg_gen_shri_vec(vece, n, n, shr);
7669 tcg_gen_dupi_vec(vece, t, mask);
7670 tcg_gen_and_vec(vece, d, n, t);
7671 tcg_temp_free_vec(t);
7672}
7673
7674static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7675{
7676 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7677 static const GVecGen2i ops[3] = {
7678 { .fni8 = gen_shrnb16_i64,
7679 .fniv = gen_shrnb_vec,
7680 .opt_opc = vec_list,
7681 .fno = gen_helper_sve2_shrnb_h,
7682 .vece = MO_16 },
7683 { .fni8 = gen_shrnb32_i64,
7684 .fniv = gen_shrnb_vec,
7685 .opt_opc = vec_list,
7686 .fno = gen_helper_sve2_shrnb_s,
7687 .vece = MO_32 },
7688 { .fni8 = gen_shrnb64_i64,
7689 .fniv = gen_shrnb_vec,
7690 .opt_opc = vec_list,
7691 .fno = gen_helper_sve2_shrnb_d,
7692 .vece = MO_64 },
7693 };
7694 return do_sve2_shr_narrow(s, a, ops);
7695}
7696
7697static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7698{
7699 int halfbits = 4 << vece;
7700 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7701
7702 tcg_gen_shli_i64(n, n, halfbits - shr);
7703 tcg_gen_andi_i64(n, n, ~mask);
7704 tcg_gen_andi_i64(d, d, mask);
7705 tcg_gen_or_i64(d, d, n);
7706}
7707
7708static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7709{
7710 gen_shrnt_i64(MO_16, d, n, shr);
7711}
7712
7713static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7714{
7715 gen_shrnt_i64(MO_32, d, n, shr);
7716}
7717
7718static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7719{
7720 tcg_gen_shri_i64(n, n, shr);
7721 tcg_gen_deposit_i64(d, d, n, 32, 32);
7722}
7723
7724static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7725{
7726 TCGv_vec t = tcg_temp_new_vec_matching(d);
7727 int halfbits = 4 << vece;
7728 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7729
7730 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7731 tcg_gen_dupi_vec(vece, t, mask);
7732 tcg_gen_bitsel_vec(vece, d, t, d, n);
7733 tcg_temp_free_vec(t);
7734}
7735
7736static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7737{
7738 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7739 static const GVecGen2i ops[3] = {
7740 { .fni8 = gen_shrnt16_i64,
7741 .fniv = gen_shrnt_vec,
7742 .opt_opc = vec_list,
7743 .load_dest = true,
7744 .fno = gen_helper_sve2_shrnt_h,
7745 .vece = MO_16 },
7746 { .fni8 = gen_shrnt32_i64,
7747 .fniv = gen_shrnt_vec,
7748 .opt_opc = vec_list,
7749 .load_dest = true,
7750 .fno = gen_helper_sve2_shrnt_s,
7751 .vece = MO_32 },
7752 { .fni8 = gen_shrnt64_i64,
7753 .fniv = gen_shrnt_vec,
7754 .opt_opc = vec_list,
7755 .load_dest = true,
7756 .fno = gen_helper_sve2_shrnt_d,
7757 .vece = MO_64 },
7758 };
7759 return do_sve2_shr_narrow(s, a, ops);
7760}
7761
7762static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7763{
7764 static const GVecGen2i ops[3] = {
7765 { .fno = gen_helper_sve2_rshrnb_h },
7766 { .fno = gen_helper_sve2_rshrnb_s },
7767 { .fno = gen_helper_sve2_rshrnb_d },
7768 };
7769 return do_sve2_shr_narrow(s, a, ops);
7770}
7771
7772static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7773{
7774 static const GVecGen2i ops[3] = {
7775 { .fno = gen_helper_sve2_rshrnt_h },
7776 { .fno = gen_helper_sve2_rshrnt_s },
7777 { .fno = gen_helper_sve2_rshrnt_d },
7778 };
7779 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7780}
7781
7782static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7783 TCGv_vec n, int64_t shr)
7784{
7785 TCGv_vec t = tcg_temp_new_vec_matching(d);
7786 int halfbits = 4 << vece;
7787
7788 tcg_gen_sari_vec(vece, n, n, shr);
7789 tcg_gen_dupi_vec(vece, t, 0);
7790 tcg_gen_smax_vec(vece, n, n, t);
7791 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7792 tcg_gen_umin_vec(vece, d, n, t);
7793 tcg_temp_free_vec(t);
7794}
7795
7796static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7797{
7798 static const TCGOpcode vec_list[] = {
7799 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7800 };
7801 static const GVecGen2i ops[3] = {
7802 { .fniv = gen_sqshrunb_vec,
7803 .opt_opc = vec_list,
7804 .fno = gen_helper_sve2_sqshrunb_h,
7805 .vece = MO_16 },
7806 { .fniv = gen_sqshrunb_vec,
7807 .opt_opc = vec_list,
7808 .fno = gen_helper_sve2_sqshrunb_s,
7809 .vece = MO_32 },
7810 { .fniv = gen_sqshrunb_vec,
7811 .opt_opc = vec_list,
7812 .fno = gen_helper_sve2_sqshrunb_d,
7813 .vece = MO_64 },
7814 };
7815 return do_sve2_shr_narrow(s, a, ops);
7816}
7817
7818static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7819 TCGv_vec n, int64_t shr)
7820{
7821 TCGv_vec t = tcg_temp_new_vec_matching(d);
7822 int halfbits = 4 << vece;
7823
7824 tcg_gen_sari_vec(vece, n, n, shr);
7825 tcg_gen_dupi_vec(vece, t, 0);
7826 tcg_gen_smax_vec(vece, n, n, t);
7827 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7828 tcg_gen_umin_vec(vece, n, n, t);
7829 tcg_gen_shli_vec(vece, n, n, halfbits);
7830 tcg_gen_bitsel_vec(vece, d, t, d, n);
7831 tcg_temp_free_vec(t);
7832}
7833
7834static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7835{
7836 static const TCGOpcode vec_list[] = {
7837 INDEX_op_shli_vec, INDEX_op_sari_vec,
7838 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7839 };
7840 static const GVecGen2i ops[3] = {
7841 { .fniv = gen_sqshrunt_vec,
7842 .opt_opc = vec_list,
7843 .load_dest = true,
7844 .fno = gen_helper_sve2_sqshrunt_h,
7845 .vece = MO_16 },
7846 { .fniv = gen_sqshrunt_vec,
7847 .opt_opc = vec_list,
7848 .load_dest = true,
7849 .fno = gen_helper_sve2_sqshrunt_s,
7850 .vece = MO_32 },
7851 { .fniv = gen_sqshrunt_vec,
7852 .opt_opc = vec_list,
7853 .load_dest = true,
7854 .fno = gen_helper_sve2_sqshrunt_d,
7855 .vece = MO_64 },
7856 };
7857 return do_sve2_shr_narrow(s, a, ops);
7858}
7859
7860static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7861{
7862 static const GVecGen2i ops[3] = {
7863 { .fno = gen_helper_sve2_sqrshrunb_h },
7864 { .fno = gen_helper_sve2_sqrshrunb_s },
7865 { .fno = gen_helper_sve2_sqrshrunb_d },
7866 };
7867 return do_sve2_shr_narrow(s, a, ops);
7868}
7869
7870static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7871{
7872 static const GVecGen2i ops[3] = {
7873 { .fno = gen_helper_sve2_sqrshrunt_h },
7874 { .fno = gen_helper_sve2_sqrshrunt_s },
7875 { .fno = gen_helper_sve2_sqrshrunt_d },
7876 };
7877 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7878}
7879
743bb147
RH
7880static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7881 TCGv_vec n, int64_t shr)
7882{
7883 TCGv_vec t = tcg_temp_new_vec_matching(d);
7884 int halfbits = 4 << vece;
7885 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7886 int64_t min = -max - 1;
7887
7888 tcg_gen_sari_vec(vece, n, n, shr);
7889 tcg_gen_dupi_vec(vece, t, min);
7890 tcg_gen_smax_vec(vece, n, n, t);
7891 tcg_gen_dupi_vec(vece, t, max);
7892 tcg_gen_smin_vec(vece, n, n, t);
7893 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7894 tcg_gen_and_vec(vece, d, n, t);
7895 tcg_temp_free_vec(t);
7896}
7897
7898static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7899{
7900 static const TCGOpcode vec_list[] = {
7901 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7902 };
7903 static const GVecGen2i ops[3] = {
7904 { .fniv = gen_sqshrnb_vec,
7905 .opt_opc = vec_list,
7906 .fno = gen_helper_sve2_sqshrnb_h,
7907 .vece = MO_16 },
7908 { .fniv = gen_sqshrnb_vec,
7909 .opt_opc = vec_list,
7910 .fno = gen_helper_sve2_sqshrnb_s,
7911 .vece = MO_32 },
7912 { .fniv = gen_sqshrnb_vec,
7913 .opt_opc = vec_list,
7914 .fno = gen_helper_sve2_sqshrnb_d,
7915 .vece = MO_64 },
7916 };
7917 return do_sve2_shr_narrow(s, a, ops);
7918}
7919
7920static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7921 TCGv_vec n, int64_t shr)
7922{
7923 TCGv_vec t = tcg_temp_new_vec_matching(d);
7924 int halfbits = 4 << vece;
7925 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7926 int64_t min = -max - 1;
7927
7928 tcg_gen_sari_vec(vece, n, n, shr);
7929 tcg_gen_dupi_vec(vece, t, min);
7930 tcg_gen_smax_vec(vece, n, n, t);
7931 tcg_gen_dupi_vec(vece, t, max);
7932 tcg_gen_smin_vec(vece, n, n, t);
7933 tcg_gen_shli_vec(vece, n, n, halfbits);
7934 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7935 tcg_gen_bitsel_vec(vece, d, t, d, n);
7936 tcg_temp_free_vec(t);
7937}
7938
7939static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7940{
7941 static const TCGOpcode vec_list[] = {
7942 INDEX_op_shli_vec, INDEX_op_sari_vec,
7943 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7944 };
7945 static const GVecGen2i ops[3] = {
7946 { .fniv = gen_sqshrnt_vec,
7947 .opt_opc = vec_list,
7948 .load_dest = true,
7949 .fno = gen_helper_sve2_sqshrnt_h,
7950 .vece = MO_16 },
7951 { .fniv = gen_sqshrnt_vec,
7952 .opt_opc = vec_list,
7953 .load_dest = true,
7954 .fno = gen_helper_sve2_sqshrnt_s,
7955 .vece = MO_32 },
7956 { .fniv = gen_sqshrnt_vec,
7957 .opt_opc = vec_list,
7958 .load_dest = true,
7959 .fno = gen_helper_sve2_sqshrnt_d,
7960 .vece = MO_64 },
7961 };
7962 return do_sve2_shr_narrow(s, a, ops);
7963}
7964
7965static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7966{
7967 static const GVecGen2i ops[3] = {
7968 { .fno = gen_helper_sve2_sqrshrnb_h },
7969 { .fno = gen_helper_sve2_sqrshrnb_s },
7970 { .fno = gen_helper_sve2_sqrshrnb_d },
7971 };
7972 return do_sve2_shr_narrow(s, a, ops);
7973}
7974
7975static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7976{
7977 static const GVecGen2i ops[3] = {
7978 { .fno = gen_helper_sve2_sqrshrnt_h },
7979 { .fno = gen_helper_sve2_sqrshrnt_s },
7980 { .fno = gen_helper_sve2_sqrshrnt_d },
7981 };
7982 return do_sve2_shr_narrow(s, a, ops);
7983}
7984
c13418da
RH
7985static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7986 TCGv_vec n, int64_t shr)
7987{
7988 TCGv_vec t = tcg_temp_new_vec_matching(d);
7989 int halfbits = 4 << vece;
7990
7991 tcg_gen_shri_vec(vece, n, n, shr);
7992 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7993 tcg_gen_umin_vec(vece, d, n, t);
7994 tcg_temp_free_vec(t);
7995}
7996
7997static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7998{
7999 static const TCGOpcode vec_list[] = {
8000 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8001 };
8002 static const GVecGen2i ops[3] = {
8003 { .fniv = gen_uqshrnb_vec,
8004 .opt_opc = vec_list,
8005 .fno = gen_helper_sve2_uqshrnb_h,
8006 .vece = MO_16 },
8007 { .fniv = gen_uqshrnb_vec,
8008 .opt_opc = vec_list,
8009 .fno = gen_helper_sve2_uqshrnb_s,
8010 .vece = MO_32 },
8011 { .fniv = gen_uqshrnb_vec,
8012 .opt_opc = vec_list,
8013 .fno = gen_helper_sve2_uqshrnb_d,
8014 .vece = MO_64 },
8015 };
8016 return do_sve2_shr_narrow(s, a, ops);
8017}
8018
8019static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
8020 TCGv_vec n, int64_t shr)
8021{
8022 TCGv_vec t = tcg_temp_new_vec_matching(d);
8023 int halfbits = 4 << vece;
8024
8025 tcg_gen_shri_vec(vece, n, n, shr);
8026 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8027 tcg_gen_umin_vec(vece, n, n, t);
8028 tcg_gen_shli_vec(vece, n, n, halfbits);
8029 tcg_gen_bitsel_vec(vece, d, t, d, n);
8030 tcg_temp_free_vec(t);
8031}
8032
8033static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
8034{
8035 static const TCGOpcode vec_list[] = {
8036 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8037 };
8038 static const GVecGen2i ops[3] = {
8039 { .fniv = gen_uqshrnt_vec,
8040 .opt_opc = vec_list,
8041 .load_dest = true,
8042 .fno = gen_helper_sve2_uqshrnt_h,
8043 .vece = MO_16 },
8044 { .fniv = gen_uqshrnt_vec,
8045 .opt_opc = vec_list,
8046 .load_dest = true,
8047 .fno = gen_helper_sve2_uqshrnt_s,
8048 .vece = MO_32 },
8049 { .fniv = gen_uqshrnt_vec,
8050 .opt_opc = vec_list,
8051 .load_dest = true,
8052 .fno = gen_helper_sve2_uqshrnt_d,
8053 .vece = MO_64 },
8054 };
8055 return do_sve2_shr_narrow(s, a, ops);
8056}
8057
8058static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
8059{
8060 static const GVecGen2i ops[3] = {
8061 { .fno = gen_helper_sve2_uqrshrnb_h },
8062 { .fno = gen_helper_sve2_uqrshrnb_s },
8063 { .fno = gen_helper_sve2_uqrshrnb_d },
8064 };
8065 return do_sve2_shr_narrow(s, a, ops);
8066}
8067
8068static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8069{
8070 static const GVecGen2i ops[3] = {
8071 { .fno = gen_helper_sve2_uqrshrnt_h },
8072 { .fno = gen_helper_sve2_uqrshrnt_s },
8073 { .fno = gen_helper_sve2_uqrshrnt_d },
8074 };
8075 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 8076}
b87dbeeb 8077
40d5ea50
SL
8078#define DO_SVE2_ZZZ_NARROW(NAME, name) \
8079static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
8080{ \
8081 static gen_helper_gvec_3 * const fns[4] = { \
8082 NULL, gen_helper_sve2_##name##_h, \
8083 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
8084 }; \
8085 return do_sve2_zzz_ool(s, a, fns[a->esz]); \
8086}
8087
8088DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8089DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
8090DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8091DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 8092
c3cd6766
SL
8093DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8094DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
8095DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8096DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 8097
e0ae6ec3
SL
8098static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8099 gen_helper_gvec_flags_4 *fn)
8100{
8101 if (!dc_isar_feature(aa64_sve2, s)) {
8102 return false;
8103 }
8104 return do_ppzz_flags(s, a, fn);
8105}
8106
8107#define DO_SVE2_PPZZ_MATCH(NAME, name) \
8108static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8109{ \
8110 static gen_helper_gvec_flags_4 * const fns[4] = { \
8111 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
8112 NULL, NULL \
8113 }; \
8114 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
8115}
8116
8117DO_SVE2_PPZZ_MATCH(MATCH, match)
8118DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8119
7d47ac94
SL
8120static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8121{
8122 static gen_helper_gvec_4 * const fns[2] = {
8123 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8124 };
8125 if (a->esz < 2) {
8126 return false;
8127 }
8128 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8129}
8130
8131static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8132{
8133 if (a->esz != 0) {
8134 return false;
8135 }
8136 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8137}
8138
b87dbeeb
SL
8139static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8140 gen_helper_gvec_4_ptr *fn)
8141{
8142 if (!dc_isar_feature(aa64_sve2, s)) {
8143 return false;
8144 }
8145 return do_zpzz_fp(s, a, fn);
8146}
8147
8148#define DO_SVE2_ZPZZ_FP(NAME, name) \
8149static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8150{ \
8151 static gen_helper_gvec_4_ptr * const fns[4] = { \
8152 NULL, gen_helper_sve2_##name##_zpzz_h, \
8153 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
8154 }; \
8155 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
8156}
8157
8158DO_SVE2_ZPZZ_FP(FADDP, faddp)
8159DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8160DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8161DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8162DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
8163
8164/*
8165 * SVE Integer Multiply-Add (unpredicated)
8166 */
8167
4f26756b
SL
8168static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8169{
8170 gen_helper_gvec_4_ptr *fn;
8171
8172 switch (a->esz) {
8173 case MO_32:
8174 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8175 return false;
8176 }
8177 fn = gen_helper_fmmla_s;
8178 break;
8179 case MO_64:
8180 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8181 return false;
8182 }
8183 fn = gen_helper_fmmla_d;
8184 break;
8185 default:
8186 return false;
8187 }
8188
8189 if (sve_access_check(s)) {
8190 unsigned vsz = vec_full_reg_size(s);
8191 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8192 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8193 vec_full_reg_offset(s, a->rn),
8194 vec_full_reg_offset(s, a->rm),
8195 vec_full_reg_offset(s, a->ra),
8196 status, vsz, vsz, 0, fn);
8197 tcg_temp_free_ptr(status);
8198 }
8199 return true;
8200}
8201
bfc9307e
RH
8202static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8203 bool sel1, bool sel2)
8204{
8205 static gen_helper_gvec_4 * const fns[] = {
8206 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
8207 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8208 };
8209 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8210}
8211
8212static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8213 bool sel1, bool sel2)
8214{
8215 static gen_helper_gvec_4 * const fns[] = {
8216 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
8217 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8218 };
8219 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8220}
8221
8222static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8223{
8224 return do_sqdmlal_zzzw(s, a, false, false);
8225}
8226
8227static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8228{
8229 return do_sqdmlal_zzzw(s, a, true, true);
8230}
8231
8232static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8233{
8234 return do_sqdmlal_zzzw(s, a, false, true);
8235}
8236
8237static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8238{
8239 return do_sqdmlsl_zzzw(s, a, false, false);
8240}
8241
8242static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8243{
8244 return do_sqdmlsl_zzzw(s, a, true, true);
8245}
8246
8247static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8248{
8249 return do_sqdmlsl_zzzw(s, a, false, true);
8250}
ab3ddf31
RH
8251
8252static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8253{
8254 static gen_helper_gvec_4 * const fns[] = {
8255 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8256 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8257 };
8258 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8259}
8260
8261static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8262{
8263 static gen_helper_gvec_4 * const fns[] = {
8264 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8265 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8266 };
8267 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8268}
45a32e80
RH
8269
8270static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8271{
8272 static gen_helper_gvec_4 * const fns[] = {
8273 NULL, gen_helper_sve2_smlal_zzzw_h,
8274 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8275 };
8276 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8277}
8278
8279static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8280{
8281 return do_smlal_zzzw(s, a, false);
8282}
8283
8284static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8285{
8286 return do_smlal_zzzw(s, a, true);
8287}
8288
8289static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8290{
8291 static gen_helper_gvec_4 * const fns[] = {
8292 NULL, gen_helper_sve2_umlal_zzzw_h,
8293 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8294 };
8295 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8296}
8297
8298static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8299{
8300 return do_umlal_zzzw(s, a, false);
8301}
8302
8303static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8304{
8305 return do_umlal_zzzw(s, a, true);
8306}
8307
8308static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8309{
8310 static gen_helper_gvec_4 * const fns[] = {
8311 NULL, gen_helper_sve2_smlsl_zzzw_h,
8312 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8313 };
8314 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8315}
8316
8317static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8318{
8319 return do_smlsl_zzzw(s, a, false);
8320}
8321
8322static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8323{
8324 return do_smlsl_zzzw(s, a, true);
8325}
8326
8327static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8328{
8329 static gen_helper_gvec_4 * const fns[] = {
8330 NULL, gen_helper_sve2_umlsl_zzzw_h,
8331 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8332 };
8333 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8334}
8335
8336static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8337{
8338 return do_umlsl_zzzw(s, a, false);
8339}
8340
8341static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8342{
8343 return do_umlsl_zzzw(s, a, true);
8344}
d782d3ca
RH
8345
8346static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8347{
8348 static gen_helper_gvec_4 * const fns[] = {
8349 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8350 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8351 };
8352
8353 if (!dc_isar_feature(aa64_sve2, s)) {
8354 return false;
8355 }
8356 if (sve_access_check(s)) {
8357 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8358 }
8359 return true;
8360}
8361
21068f39
RH
8362static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8363{
8364 if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8365 return false;
8366 }
8367 if (sve_access_check(s)) {
8368 gen_helper_gvec_4 *fn = (a->esz == MO_32
8369 ? gen_helper_sve2_cdot_zzzz_s
8370 : gen_helper_sve2_cdot_zzzz_d);
8371 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8372 }
8373 return true;
8374}
8375
d782d3ca
RH
8376static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8377{
8378 static gen_helper_gvec_4 * const fns[] = {
8379 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8380 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8381 };
8382
8383 if (!dc_isar_feature(aa64_sve2, s)) {
8384 return false;
8385 }
8386 if (sve_access_check(s)) {
8387 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8388 }
8389 return true;
8390}
6a98cb2a
RH
8391
8392static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8393{
8394 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8395 return false;
8396 }
8397 if (sve_access_check(s)) {
8398 unsigned vsz = vec_full_reg_size(s);
8399 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8400 vec_full_reg_offset(s, a->rn),
8401 vec_full_reg_offset(s, a->rm),
8402 vec_full_reg_offset(s, a->ra),
8403 vsz, vsz, 0, gen_helper_gvec_usdot_b);
8404 }
8405 return true;
8406}
b2bcd1be
RH
8407
8408static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
8409{
8410 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8411 return false;
8412 }
8413 if (sve_access_check(s)) {
8414 gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
8415 }
8416 return true;
8417}
3cc7a88e
RH
8418
8419static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8420{
8421 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8422 return false;
8423 }
8424 if (sve_access_check(s)) {
8425 gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
8426 a->rd, a->rn, a->rm, decrypt);
8427 }
8428 return true;
8429}
8430
8431static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8432{
8433 return do_aese(s, a, false);
8434}
8435
8436static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8437{
8438 return do_aese(s, a, true);
8439}
8440
8441static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8442{
8443 if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8444 return false;
8445 }
8446 if (sve_access_check(s)) {
8447 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
8448 }
8449 return true;
8450}
8451
8452static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8453{
8454 return do_sm4(s, a, gen_helper_crypto_sm4e);
8455}
3358eb3f
RH
8456
8457static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8458{
8459 return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8460}
8461
8462static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8463{
8464 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8465 return false;
8466 }
8467 if (sve_access_check(s)) {
8468 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8469 }
8470 return true;
8471}
5c1b7226
RH
8472
8473static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8474{
8475 if (!dc_isar_feature(aa64_sve2, s)) {
8476 return false;
8477 }
8478 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8479}
8480
d29b17ca
RH
8481static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8482{
8483 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8484 return false;
8485 }
8486 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8487}
8488
5c1b7226
RH
8489static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8490{
8491 if (!dc_isar_feature(aa64_sve2, s)) {
8492 return false;
8493 }
8494 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8495}
83c2523f
SL
8496
8497static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8498{
8499 if (!dc_isar_feature(aa64_sve2, s)) {
8500 return false;
8501 }
8502 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8503}
8504
8505static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8506{
8507 if (!dc_isar_feature(aa64_sve2, s)) {
8508 return false;
8509 }
8510 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8511}
95365277
SL
8512
8513static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8514{
8515 if (!dc_isar_feature(aa64_sve2, s)) {
8516 return false;
8517 }
8518 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8519}
8520
8521static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8522{
8523 if (!dc_isar_feature(aa64_sve2, s)) {
8524 return false;
8525 }
8526 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8527}
631be02e
SL
8528
8529static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8530{
8531 static gen_helper_gvec_3_ptr * const fns[] = {
8532 NULL, gen_helper_flogb_h,
8533 gen_helper_flogb_s, gen_helper_flogb_d
8534 };
8535
8536 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8537 return false;
8538 }
8539 if (sve_access_check(s)) {
8540 TCGv_ptr status =
8541 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8542 unsigned vsz = vec_full_reg_size(s);
8543
8544 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8545 vec_full_reg_offset(s, a->rn),
8546 pred_full_reg_offset(s, a->pg),
8547 status, vsz, vsz, 0, fns[a->esz]);
8548 tcg_temp_free_ptr(status);
8549 }
8550 return true;
8551}
50d102bd
SL
8552
8553static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8554{
8555 if (!dc_isar_feature(aa64_sve2, s)) {
8556 return false;
8557 }
8558 if (sve_access_check(s)) {
8559 unsigned vsz = vec_full_reg_size(s);
8560 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8561 vec_full_reg_offset(s, a->rn),
8562 vec_full_reg_offset(s, a->rm),
8563 vec_full_reg_offset(s, a->ra),
8564 cpu_env, vsz, vsz, (sel << 1) | sub,
8565 gen_helper_sve2_fmlal_zzzw_s);
8566 }
8567 return true;
8568}
8569
8570static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8571{
8572 return do_FMLAL_zzzw(s, a, false, false);
8573}
8574
8575static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8576{
8577 return do_FMLAL_zzzw(s, a, false, true);
8578}
8579
8580static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8581{
8582 return do_FMLAL_zzzw(s, a, true, false);
8583}
8584
8585static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8586{
8587 return do_FMLAL_zzzw(s, a, true, true);
8588}
8589
8590static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8591{
8592 if (!dc_isar_feature(aa64_sve2, s)) {
8593 return false;
8594 }
8595 if (sve_access_check(s)) {
8596 unsigned vsz = vec_full_reg_size(s);
8597 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8598 vec_full_reg_offset(s, a->rn),
8599 vec_full_reg_offset(s, a->rm),
8600 vec_full_reg_offset(s, a->ra),
8601 cpu_env, vsz, vsz,
8602 (a->index << 2) | (sel << 1) | sub,
8603 gen_helper_sve2_fmlal_zzxw_s);
8604 }
8605 return true;
8606}
8607
8608static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8609{
8610 return do_FMLAL_zzxw(s, a, false, false);
8611}
8612
8613static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8614{
8615 return do_FMLAL_zzxw(s, a, false, true);
8616}
8617
8618static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8619{
8620 return do_FMLAL_zzxw(s, a, true, false);
8621}
8622
8623static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8624{
8625 return do_FMLAL_zzxw(s, a, true, true);
8626}
2323c5ff
RH
8627
8628static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8629 gen_helper_gvec_4 *fn, int data)
8630{
8631 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8632 return false;
8633 }
8634 if (sve_access_check(s)) {
8635 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8636 }
8637 return true;
8638}
8639
8640static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8641{
8642 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8643}
8644
8645static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8646{
8647 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8648}
8649
8650static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8651{
8652 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8653}
cb8657f7
RH
8654
8655static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8656{
8657 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8658 return false;
8659 }
8660 if (sve_access_check(s)) {
8661 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8662 a->rd, a->rn, a->rm, a->ra, 0);
8663 }
8664 return true;
8665}
83914478
RH
8666
8667static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8668{
8669 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8670 return false;
8671 }
8672 if (sve_access_check(s)) {
8673 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8674 a->rd, a->rn, a->rm, a->ra, a->index);
8675 }
8676 return true;
8677}
81266a1f
RH
8678
8679static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8680{
8681 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8682 return false;
8683 }
8684 if (sve_access_check(s)) {
8685 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8686 a->rd, a->rn, a->rm, a->ra, 0);
8687 }
8688 return true;
8689}
5693887f
RH
8690
8691static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8692{
8693 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8694 return false;
8695 }
8696 if (sve_access_check(s)) {
8697 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8698 unsigned vsz = vec_full_reg_size(s);
8699
8700 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8701 vec_full_reg_offset(s, a->rn),
8702 vec_full_reg_offset(s, a->rm),
8703 vec_full_reg_offset(s, a->ra),
8704 status, vsz, vsz, sel,
8705 gen_helper_gvec_bfmlal);
8706 tcg_temp_free_ptr(status);
8707 }
8708 return true;
8709}
8710
8711static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8712{
8713 return do_BFMLAL_zzzw(s, a, false);
8714}
8715
8716static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8717{
8718 return do_BFMLAL_zzzw(s, a, true);
8719}
458d0ab6
RH
8720
8721static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8722{
8723 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8724 return false;
8725 }
8726 if (sve_access_check(s)) {
8727 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8728 unsigned vsz = vec_full_reg_size(s);
8729
8730 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8731 vec_full_reg_offset(s, a->rn),
8732 vec_full_reg_offset(s, a->rm),
8733 vec_full_reg_offset(s, a->ra),
8734 status, vsz, vsz, (a->index << 1) | sel,
8735 gen_helper_gvec_bfmlal_idx);
8736 tcg_temp_free_ptr(status);
8737 }
8738 return true;
8739}
8740
8741static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8742{
8743 return do_BFMLAL_zzxw(s, a, false);
8744}
8745
8746static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8747{
8748 return do_BFMLAL_zzxw(s, a, true);
8749}