]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE2 HISTCNT, HISTSEG
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
139c1837 103#include "decode-sve.c.inc"
38388f7e
RH
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
40e32e5a
RH
145/* Invoke an out-of-line helper on 2 Zregs. */
146static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
148{
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
153}
154
e645d1a1
RH
155/* Invoke an out-of-line helper on 3 Zregs. */
156static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
158{
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
164}
165
38650638
RH
166/* Invoke an out-of-line helper on 4 Zregs. */
167static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
168 int rd, int rn, int rm, int ra, int data)
169{
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 vec_full_reg_offset(s, rm),
174 vec_full_reg_offset(s, ra),
175 vsz, vsz, data, fn);
176}
177
96a461f7
RH
178/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
179static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
180 int rd, int rn, int pg, int data)
181{
182 unsigned vsz = vec_full_reg_size(s);
183 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
184 vec_full_reg_offset(s, rn),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
187}
188
36cbb7a8
RH
189/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
190static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
191 int rd, int rn, int rm, int pg, int data)
192{
193 unsigned vsz = vec_full_reg_size(s);
194 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn),
196 vec_full_reg_offset(s, rm),
197 pred_full_reg_offset(s, pg),
198 vsz, vsz, data, fn);
199}
f7d79c41 200
36cbb7a8 201/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
202static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
203 int esz, int rd, int rn)
38388f7e 204{
f7d79c41
RH
205 unsigned vsz = vec_full_reg_size(s);
206 gvec_fn(esz, vec_full_reg_offset(s, rd),
207 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
208}
209
39eea561 210/* Invoke a vector expander on three Zregs. */
28c4da31
RH
211static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
212 int esz, int rd, int rn, int rm)
38388f7e 213{
28c4da31
RH
214 unsigned vsz = vec_full_reg_size(s);
215 gvec_fn(esz, vec_full_reg_offset(s, rd),
216 vec_full_reg_offset(s, rn),
217 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
218}
219
911cdc6d
RH
220/* Invoke a vector expander on four Zregs. */
221static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
222 int esz, int rd, int rn, int rm, int ra)
223{
224 unsigned vsz = vec_full_reg_size(s);
225 gvec_fn(esz, vec_full_reg_offset(s, rd),
226 vec_full_reg_offset(s, rn),
227 vec_full_reg_offset(s, rm),
228 vec_full_reg_offset(s, ra), vsz, vsz);
229}
230
39eea561
RH
231/* Invoke a vector move on two Zregs. */
232static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 233{
f7d79c41
RH
234 if (sve_access_check(s)) {
235 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
236 }
237 return true;
38388f7e
RH
238}
239
d9d78dcc
RH
240/* Initialize a Zreg with replications of a 64-bit immediate. */
241static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
242{
243 unsigned vsz = vec_full_reg_size(s);
8711e71f 244 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
245}
246
516e246a 247/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
248static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
249 int rd, int rn, int rm)
516e246a 250{
dd81a8d7
RH
251 unsigned psz = pred_gvec_reg_size(s);
252 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
253 pred_full_reg_offset(s, rn),
254 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
255}
256
257/* Invoke a vector move on two Pregs. */
258static bool do_mov_p(DisasContext *s, int rd, int rn)
259{
d0b2df5a
RH
260 if (sve_access_check(s)) {
261 unsigned psz = pred_gvec_reg_size(s);
262 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
263 pred_full_reg_offset(s, rn), psz, psz);
264 }
265 return true;
516e246a
RH
266}
267
9e18d7a6
RH
268/* Set the cpu flags as per a return from an SVE helper. */
269static void do_pred_flags(TCGv_i32 t)
270{
271 tcg_gen_mov_i32(cpu_NF, t);
272 tcg_gen_andi_i32(cpu_ZF, t, 2);
273 tcg_gen_andi_i32(cpu_CF, t, 1);
274 tcg_gen_movi_i32(cpu_VF, 0);
275}
276
277/* Subroutines computing the ARM PredTest psuedofunction. */
278static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
279{
280 TCGv_i32 t = tcg_temp_new_i32();
281
282 gen_helper_sve_predtest1(t, d, g);
283 do_pred_flags(t);
284 tcg_temp_free_i32(t);
285}
286
287static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
288{
289 TCGv_ptr dptr = tcg_temp_new_ptr();
290 TCGv_ptr gptr = tcg_temp_new_ptr();
291 TCGv_i32 t;
292
293 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
294 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
295 t = tcg_const_i32(words);
296
297 gen_helper_sve_predtest(t, dptr, gptr, t);
298 tcg_temp_free_ptr(dptr);
299 tcg_temp_free_ptr(gptr);
300
301 do_pred_flags(t);
302 tcg_temp_free_i32(t);
303}
304
028e2a7b
RH
305/* For each element size, the bits within a predicate word that are active. */
306const uint64_t pred_esz_masks[4] = {
307 0xffffffffffffffffull, 0x5555555555555555ull,
308 0x1111111111111111ull, 0x0101010101010101ull
309};
310
39eea561
RH
311/*
312 *** SVE Logical - Unpredicated Group
313 */
314
28c4da31
RH
315static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
316{
317 if (sve_access_check(s)) {
318 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
319 }
320 return true;
321}
322
3a7be554 323static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 324{
28c4da31 325 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
326}
327
3a7be554 328static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 329{
28c4da31 330 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
331}
332
3a7be554 333static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 334{
28c4da31 335 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
336}
337
3a7be554 338static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 339{
28c4da31 340 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 341}
d1822297 342
911cdc6d
RH
343static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
344{
345 if (!dc_isar_feature(aa64_sve2, s)) {
346 return false;
347 }
348 if (sve_access_check(s)) {
349 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
350 }
351 return true;
352}
353
354static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
355{
356 tcg_gen_xor_i64(d, n, m);
357 tcg_gen_xor_i64(d, d, k);
358}
359
360static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
361 TCGv_vec m, TCGv_vec k)
362{
363 tcg_gen_xor_vec(vece, d, n, m);
364 tcg_gen_xor_vec(vece, d, d, k);
365}
366
367static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
368 uint32_t a, uint32_t oprsz, uint32_t maxsz)
369{
370 static const GVecGen4 op = {
371 .fni8 = gen_eor3_i64,
372 .fniv = gen_eor3_vec,
373 .fno = gen_helper_sve2_eor3,
374 .vece = MO_64,
375 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
376 };
377 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
378}
379
380static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
381{
382 return do_sve2_zzzz_fn(s, a, gen_eor3);
383}
384
385static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
386{
387 tcg_gen_andc_i64(d, m, k);
388 tcg_gen_xor_i64(d, d, n);
389}
390
391static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
392 TCGv_vec m, TCGv_vec k)
393{
394 tcg_gen_andc_vec(vece, d, m, k);
395 tcg_gen_xor_vec(vece, d, d, n);
396}
397
398static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
399 uint32_t a, uint32_t oprsz, uint32_t maxsz)
400{
401 static const GVecGen4 op = {
402 .fni8 = gen_bcax_i64,
403 .fniv = gen_bcax_vec,
404 .fno = gen_helper_sve2_bcax,
405 .vece = MO_64,
406 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
407 };
408 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
409}
410
411static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
412{
413 return do_sve2_zzzz_fn(s, a, gen_bcax);
414}
415
416static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
417 uint32_t a, uint32_t oprsz, uint32_t maxsz)
418{
419 /* BSL differs from the generic bitsel in argument ordering. */
420 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
421}
422
423static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
424{
425 return do_sve2_zzzz_fn(s, a, gen_bsl);
426}
427
428static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
429{
430 tcg_gen_andc_i64(n, k, n);
431 tcg_gen_andc_i64(m, m, k);
432 tcg_gen_or_i64(d, n, m);
433}
434
435static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
436 TCGv_vec m, TCGv_vec k)
437{
438 if (TCG_TARGET_HAS_bitsel_vec) {
439 tcg_gen_not_vec(vece, n, n);
440 tcg_gen_bitsel_vec(vece, d, k, n, m);
441 } else {
442 tcg_gen_andc_vec(vece, n, k, n);
443 tcg_gen_andc_vec(vece, m, m, k);
444 tcg_gen_or_vec(vece, d, n, m);
445 }
446}
447
448static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
449 uint32_t a, uint32_t oprsz, uint32_t maxsz)
450{
451 static const GVecGen4 op = {
452 .fni8 = gen_bsl1n_i64,
453 .fniv = gen_bsl1n_vec,
454 .fno = gen_helper_sve2_bsl1n,
455 .vece = MO_64,
456 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
457 };
458 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
459}
460
461static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
462{
463 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
464}
465
466static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
467{
468 /*
469 * Z[dn] = (n & k) | (~m & ~k)
470 * = | ~(m | k)
471 */
472 tcg_gen_and_i64(n, n, k);
473 if (TCG_TARGET_HAS_orc_i64) {
474 tcg_gen_or_i64(m, m, k);
475 tcg_gen_orc_i64(d, n, m);
476 } else {
477 tcg_gen_nor_i64(m, m, k);
478 tcg_gen_or_i64(d, n, m);
479 }
480}
481
482static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
483 TCGv_vec m, TCGv_vec k)
484{
485 if (TCG_TARGET_HAS_bitsel_vec) {
486 tcg_gen_not_vec(vece, m, m);
487 tcg_gen_bitsel_vec(vece, d, k, n, m);
488 } else {
489 tcg_gen_and_vec(vece, n, n, k);
490 tcg_gen_or_vec(vece, m, m, k);
491 tcg_gen_orc_vec(vece, d, n, m);
492 }
493}
494
495static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
496 uint32_t a, uint32_t oprsz, uint32_t maxsz)
497{
498 static const GVecGen4 op = {
499 .fni8 = gen_bsl2n_i64,
500 .fniv = gen_bsl2n_vec,
501 .fno = gen_helper_sve2_bsl2n,
502 .vece = MO_64,
503 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
504 };
505 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
506}
507
508static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
509{
510 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
511}
512
513static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
514{
515 tcg_gen_and_i64(n, n, k);
516 tcg_gen_andc_i64(m, m, k);
517 tcg_gen_nor_i64(d, n, m);
518}
519
520static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
521 TCGv_vec m, TCGv_vec k)
522{
523 tcg_gen_bitsel_vec(vece, d, k, n, m);
524 tcg_gen_not_vec(vece, d, d);
525}
526
527static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
528 uint32_t a, uint32_t oprsz, uint32_t maxsz)
529{
530 static const GVecGen4 op = {
531 .fni8 = gen_nbsl_i64,
532 .fniv = gen_nbsl_vec,
533 .fno = gen_helper_sve2_nbsl,
534 .vece = MO_64,
535 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
536 };
537 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
538}
539
540static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
541{
542 return do_sve2_zzzz_fn(s, a, gen_nbsl);
543}
544
fea98f9c
RH
545/*
546 *** SVE Integer Arithmetic - Unpredicated Group
547 */
548
3a7be554 549static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 550{
28c4da31 551 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
552}
553
3a7be554 554static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 555{
28c4da31 556 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
557}
558
3a7be554 559static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 560{
28c4da31 561 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
562}
563
3a7be554 564static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 565{
28c4da31 566 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
567}
568
3a7be554 569static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 570{
28c4da31 571 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
572}
573
3a7be554 574static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 575{
28c4da31 576 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
577}
578
f97cfd59
RH
579/*
580 *** SVE Integer Arithmetic - Binary Predicated Group
581 */
582
583static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
584{
f97cfd59
RH
585 if (fn == NULL) {
586 return false;
587 }
588 if (sve_access_check(s)) {
36cbb7a8 589 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
590 }
591 return true;
592}
593
a2103582
RH
594/* Select active elememnts from Zn and inactive elements from Zm,
595 * storing the result in Zd.
596 */
597static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
598{
599 static gen_helper_gvec_4 * const fns[4] = {
600 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
601 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
602 };
36cbb7a8 603 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
604}
605
f97cfd59 606#define DO_ZPZZ(NAME, name) \
3a7be554 607static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
608{ \
609 static gen_helper_gvec_4 * const fns[4] = { \
610 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
611 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
612 }; \
613 return do_zpzz_ool(s, a, fns[a->esz]); \
614}
615
616DO_ZPZZ(AND, and)
617DO_ZPZZ(EOR, eor)
618DO_ZPZZ(ORR, orr)
619DO_ZPZZ(BIC, bic)
620
621DO_ZPZZ(ADD, add)
622DO_ZPZZ(SUB, sub)
623
624DO_ZPZZ(SMAX, smax)
625DO_ZPZZ(UMAX, umax)
626DO_ZPZZ(SMIN, smin)
627DO_ZPZZ(UMIN, umin)
628DO_ZPZZ(SABD, sabd)
629DO_ZPZZ(UABD, uabd)
630
631DO_ZPZZ(MUL, mul)
632DO_ZPZZ(SMULH, smulh)
633DO_ZPZZ(UMULH, umulh)
634
27721dbb
RH
635DO_ZPZZ(ASR, asr)
636DO_ZPZZ(LSR, lsr)
637DO_ZPZZ(LSL, lsl)
638
3a7be554 639static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
640{
641 static gen_helper_gvec_4 * const fns[4] = {
642 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
643 };
644 return do_zpzz_ool(s, a, fns[a->esz]);
645}
646
3a7be554 647static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
648{
649 static gen_helper_gvec_4 * const fns[4] = {
650 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
651 };
652 return do_zpzz_ool(s, a, fns[a->esz]);
653}
654
3a7be554 655static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
656{
657 if (sve_access_check(s)) {
658 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
659 }
660 return true;
661}
d3fe4a29 662
f97cfd59
RH
663#undef DO_ZPZZ
664
afac6d04
RH
665/*
666 *** SVE Integer Arithmetic - Unary Predicated Group
667 */
668
669static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
670{
671 if (fn == NULL) {
672 return false;
673 }
674 if (sve_access_check(s)) {
96a461f7 675 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
676 }
677 return true;
678}
679
680#define DO_ZPZ(NAME, name) \
3a7be554 681static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
682{ \
683 static gen_helper_gvec_3 * const fns[4] = { \
684 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
685 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
686 }; \
687 return do_zpz_ool(s, a, fns[a->esz]); \
688}
689
690DO_ZPZ(CLS, cls)
691DO_ZPZ(CLZ, clz)
692DO_ZPZ(CNT_zpz, cnt_zpz)
693DO_ZPZ(CNOT, cnot)
694DO_ZPZ(NOT_zpz, not_zpz)
695DO_ZPZ(ABS, abs)
696DO_ZPZ(NEG, neg)
697
3a7be554 698static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
699{
700 static gen_helper_gvec_3 * const fns[4] = {
701 NULL,
702 gen_helper_sve_fabs_h,
703 gen_helper_sve_fabs_s,
704 gen_helper_sve_fabs_d
705 };
706 return do_zpz_ool(s, a, fns[a->esz]);
707}
708
3a7be554 709static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
710{
711 static gen_helper_gvec_3 * const fns[4] = {
712 NULL,
713 gen_helper_sve_fneg_h,
714 gen_helper_sve_fneg_s,
715 gen_helper_sve_fneg_d
716 };
717 return do_zpz_ool(s, a, fns[a->esz]);
718}
719
3a7be554 720static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
721{
722 static gen_helper_gvec_3 * const fns[4] = {
723 NULL,
724 gen_helper_sve_sxtb_h,
725 gen_helper_sve_sxtb_s,
726 gen_helper_sve_sxtb_d
727 };
728 return do_zpz_ool(s, a, fns[a->esz]);
729}
730
3a7be554 731static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
732{
733 static gen_helper_gvec_3 * const fns[4] = {
734 NULL,
735 gen_helper_sve_uxtb_h,
736 gen_helper_sve_uxtb_s,
737 gen_helper_sve_uxtb_d
738 };
739 return do_zpz_ool(s, a, fns[a->esz]);
740}
741
3a7be554 742static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
743{
744 static gen_helper_gvec_3 * const fns[4] = {
745 NULL, NULL,
746 gen_helper_sve_sxth_s,
747 gen_helper_sve_sxth_d
748 };
749 return do_zpz_ool(s, a, fns[a->esz]);
750}
751
3a7be554 752static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
753{
754 static gen_helper_gvec_3 * const fns[4] = {
755 NULL, NULL,
756 gen_helper_sve_uxth_s,
757 gen_helper_sve_uxth_d
758 };
759 return do_zpz_ool(s, a, fns[a->esz]);
760}
761
3a7be554 762static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
763{
764 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
765}
766
3a7be554 767static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
768{
769 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
770}
771
772#undef DO_ZPZ
773
047cec97
RH
774/*
775 *** SVE Integer Reduction Group
776 */
777
778typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
779static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
780 gen_helper_gvec_reduc *fn)
781{
782 unsigned vsz = vec_full_reg_size(s);
783 TCGv_ptr t_zn, t_pg;
784 TCGv_i32 desc;
785 TCGv_i64 temp;
786
787 if (fn == NULL) {
788 return false;
789 }
790 if (!sve_access_check(s)) {
791 return true;
792 }
793
794 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
795 temp = tcg_temp_new_i64();
796 t_zn = tcg_temp_new_ptr();
797 t_pg = tcg_temp_new_ptr();
798
799 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
800 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
801 fn(temp, t_zn, t_pg, desc);
802 tcg_temp_free_ptr(t_zn);
803 tcg_temp_free_ptr(t_pg);
804 tcg_temp_free_i32(desc);
805
806 write_fp_dreg(s, a->rd, temp);
807 tcg_temp_free_i64(temp);
808 return true;
809}
810
811#define DO_VPZ(NAME, name) \
3a7be554 812static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
813{ \
814 static gen_helper_gvec_reduc * const fns[4] = { \
815 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
816 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
817 }; \
818 return do_vpz_ool(s, a, fns[a->esz]); \
819}
820
821DO_VPZ(ORV, orv)
822DO_VPZ(ANDV, andv)
823DO_VPZ(EORV, eorv)
824
825DO_VPZ(UADDV, uaddv)
826DO_VPZ(SMAXV, smaxv)
827DO_VPZ(UMAXV, umaxv)
828DO_VPZ(SMINV, sminv)
829DO_VPZ(UMINV, uminv)
830
3a7be554 831static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
832{
833 static gen_helper_gvec_reduc * const fns[4] = {
834 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
835 gen_helper_sve_saddv_s, NULL
836 };
837 return do_vpz_ool(s, a, fns[a->esz]);
838}
839
840#undef DO_VPZ
841
ccd841c3
RH
842/*
843 *** SVE Shift by Immediate - Predicated Group
844 */
845
60245996
RH
846/*
847 * Copy Zn into Zd, storing zeros into inactive elements.
848 * If invert, store zeros into the active elements.
ccd841c3 849 */
60245996
RH
850static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
851 int esz, bool invert)
ccd841c3 852{
60245996
RH
853 static gen_helper_gvec_3 * const fns[4] = {
854 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
855 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 856 };
60245996 857
ccd841c3 858 if (sve_access_check(s)) {
96a461f7 859 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
860 }
861 return true;
862}
863
864static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
865 gen_helper_gvec_3 *fn)
866{
867 if (sve_access_check(s)) {
96a461f7 868 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
869 }
870 return true;
871}
872
3a7be554 873static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
874{
875 static gen_helper_gvec_3 * const fns[4] = {
876 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
877 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
878 };
879 if (a->esz < 0) {
880 /* Invalid tsz encoding -- see tszimm_esz. */
881 return false;
882 }
883 /* Shift by element size is architecturally valid. For
884 arithmetic right-shift, it's the same as by one less. */
885 a->imm = MIN(a->imm, (8 << a->esz) - 1);
886 return do_zpzi_ool(s, a, fns[a->esz]);
887}
888
3a7be554 889static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
890{
891 static gen_helper_gvec_3 * const fns[4] = {
892 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
893 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
894 };
895 if (a->esz < 0) {
896 return false;
897 }
898 /* Shift by element size is architecturally valid.
899 For logical shifts, it is a zeroing operation. */
900 if (a->imm >= (8 << a->esz)) {
60245996 901 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
902 } else {
903 return do_zpzi_ool(s, a, fns[a->esz]);
904 }
905}
906
3a7be554 907static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
908{
909 static gen_helper_gvec_3 * const fns[4] = {
910 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
911 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
912 };
913 if (a->esz < 0) {
914 return false;
915 }
916 /* Shift by element size is architecturally valid.
917 For logical shifts, it is a zeroing operation. */
918 if (a->imm >= (8 << a->esz)) {
60245996 919 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
920 } else {
921 return do_zpzi_ool(s, a, fns[a->esz]);
922 }
923}
924
3a7be554 925static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
926{
927 static gen_helper_gvec_3 * const fns[4] = {
928 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
929 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
930 };
931 if (a->esz < 0) {
932 return false;
933 }
934 /* Shift by element size is architecturally valid. For arithmetic
935 right shift for division, it is a zeroing operation. */
936 if (a->imm >= (8 << a->esz)) {
60245996 937 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
938 } else {
939 return do_zpzi_ool(s, a, fns[a->esz]);
940 }
941}
942
fe7f8dfb
RH
943/*
944 *** SVE Bitwise Shift - Predicated Group
945 */
946
947#define DO_ZPZW(NAME, name) \
3a7be554 948static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
949{ \
950 static gen_helper_gvec_4 * const fns[3] = { \
951 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
952 gen_helper_sve_##name##_zpzw_s, \
953 }; \
954 if (a->esz < 0 || a->esz >= 3) { \
955 return false; \
956 } \
957 return do_zpzz_ool(s, a, fns[a->esz]); \
958}
959
960DO_ZPZW(ASR, asr)
961DO_ZPZW(LSR, lsr)
962DO_ZPZW(LSL, lsl)
963
964#undef DO_ZPZW
965
d9d78dcc
RH
966/*
967 *** SVE Bitwise Shift - Unpredicated Group
968 */
969
970static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
971 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
972 int64_t, uint32_t, uint32_t))
973{
974 if (a->esz < 0) {
975 /* Invalid tsz encoding -- see tszimm_esz. */
976 return false;
977 }
978 if (sve_access_check(s)) {
979 unsigned vsz = vec_full_reg_size(s);
980 /* Shift by element size is architecturally valid. For
981 arithmetic right-shift, it's the same as by one less.
982 Otherwise it is a zeroing operation. */
983 if (a->imm >= 8 << a->esz) {
984 if (asr) {
985 a->imm = (8 << a->esz) - 1;
986 } else {
987 do_dupi_z(s, a->rd, 0);
988 return true;
989 }
990 }
991 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
992 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
993 }
994 return true;
995}
996
3a7be554 997static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
998{
999 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1000}
1001
3a7be554 1002static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1003{
1004 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1005}
1006
3a7be554 1007static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1008{
1009 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1010}
1011
1012static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1013{
1014 if (fn == NULL) {
1015 return false;
1016 }
1017 if (sve_access_check(s)) {
e645d1a1 1018 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
1019 }
1020 return true;
1021}
1022
1023#define DO_ZZW(NAME, name) \
3a7be554 1024static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
1025{ \
1026 static gen_helper_gvec_3 * const fns[4] = { \
1027 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1028 gen_helper_sve_##name##_zzw_s, NULL \
1029 }; \
1030 return do_zzw_ool(s, a, fns[a->esz]); \
1031}
1032
1033DO_ZZW(ASR, asr)
1034DO_ZZW(LSR, lsr)
1035DO_ZZW(LSL, lsl)
1036
1037#undef DO_ZZW
1038
96a36e4a
RH
1039/*
1040 *** SVE Integer Multiply-Add Group
1041 */
1042
1043static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1044 gen_helper_gvec_5 *fn)
1045{
1046 if (sve_access_check(s)) {
1047 unsigned vsz = vec_full_reg_size(s);
1048 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1049 vec_full_reg_offset(s, a->ra),
1050 vec_full_reg_offset(s, a->rn),
1051 vec_full_reg_offset(s, a->rm),
1052 pred_full_reg_offset(s, a->pg),
1053 vsz, vsz, 0, fn);
1054 }
1055 return true;
1056}
1057
1058#define DO_ZPZZZ(NAME, name) \
3a7be554 1059static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1060{ \
1061 static gen_helper_gvec_5 * const fns[4] = { \
1062 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1063 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1064 }; \
1065 return do_zpzzz_ool(s, a, fns[a->esz]); \
1066}
1067
1068DO_ZPZZZ(MLA, mla)
1069DO_ZPZZZ(MLS, mls)
1070
1071#undef DO_ZPZZZ
1072
9a56c9c3
RH
1073/*
1074 *** SVE Index Generation Group
1075 */
1076
1077static void do_index(DisasContext *s, int esz, int rd,
1078 TCGv_i64 start, TCGv_i64 incr)
1079{
1080 unsigned vsz = vec_full_reg_size(s);
1081 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1082 TCGv_ptr t_zd = tcg_temp_new_ptr();
1083
1084 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1085 if (esz == 3) {
1086 gen_helper_sve_index_d(t_zd, start, incr, desc);
1087 } else {
1088 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1089 static index_fn * const fns[3] = {
1090 gen_helper_sve_index_b,
1091 gen_helper_sve_index_h,
1092 gen_helper_sve_index_s,
1093 };
1094 TCGv_i32 s32 = tcg_temp_new_i32();
1095 TCGv_i32 i32 = tcg_temp_new_i32();
1096
1097 tcg_gen_extrl_i64_i32(s32, start);
1098 tcg_gen_extrl_i64_i32(i32, incr);
1099 fns[esz](t_zd, s32, i32, desc);
1100
1101 tcg_temp_free_i32(s32);
1102 tcg_temp_free_i32(i32);
1103 }
1104 tcg_temp_free_ptr(t_zd);
1105 tcg_temp_free_i32(desc);
1106}
1107
3a7be554 1108static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1109{
1110 if (sve_access_check(s)) {
1111 TCGv_i64 start = tcg_const_i64(a->imm1);
1112 TCGv_i64 incr = tcg_const_i64(a->imm2);
1113 do_index(s, a->esz, a->rd, start, incr);
1114 tcg_temp_free_i64(start);
1115 tcg_temp_free_i64(incr);
1116 }
1117 return true;
1118}
1119
3a7be554 1120static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1121{
1122 if (sve_access_check(s)) {
1123 TCGv_i64 start = tcg_const_i64(a->imm);
1124 TCGv_i64 incr = cpu_reg(s, a->rm);
1125 do_index(s, a->esz, a->rd, start, incr);
1126 tcg_temp_free_i64(start);
1127 }
1128 return true;
1129}
1130
3a7be554 1131static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1132{
1133 if (sve_access_check(s)) {
1134 TCGv_i64 start = cpu_reg(s, a->rn);
1135 TCGv_i64 incr = tcg_const_i64(a->imm);
1136 do_index(s, a->esz, a->rd, start, incr);
1137 tcg_temp_free_i64(incr);
1138 }
1139 return true;
1140}
1141
3a7be554 1142static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1143{
1144 if (sve_access_check(s)) {
1145 TCGv_i64 start = cpu_reg(s, a->rn);
1146 TCGv_i64 incr = cpu_reg(s, a->rm);
1147 do_index(s, a->esz, a->rd, start, incr);
1148 }
1149 return true;
1150}
1151
96f922cc
RH
1152/*
1153 *** SVE Stack Allocation Group
1154 */
1155
3a7be554 1156static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1157{
5de56742
AC
1158 if (sve_access_check(s)) {
1159 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1160 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1161 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1162 }
96f922cc
RH
1163 return true;
1164}
1165
3a7be554 1166static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1167{
5de56742
AC
1168 if (sve_access_check(s)) {
1169 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1170 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1171 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1172 }
96f922cc
RH
1173 return true;
1174}
1175
3a7be554 1176static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1177{
5de56742
AC
1178 if (sve_access_check(s)) {
1179 TCGv_i64 reg = cpu_reg(s, a->rd);
1180 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1181 }
96f922cc
RH
1182 return true;
1183}
1184
4b242d9c
RH
1185/*
1186 *** SVE Compute Vector Address Group
1187 */
1188
1189static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1190{
1191 if (sve_access_check(s)) {
e645d1a1 1192 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1193 }
1194 return true;
1195}
1196
3a7be554 1197static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1198{
1199 return do_adr(s, a, gen_helper_sve_adr_p32);
1200}
1201
3a7be554 1202static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1203{
1204 return do_adr(s, a, gen_helper_sve_adr_p64);
1205}
1206
3a7be554 1207static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1208{
1209 return do_adr(s, a, gen_helper_sve_adr_s32);
1210}
1211
3a7be554 1212static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1213{
1214 return do_adr(s, a, gen_helper_sve_adr_u32);
1215}
1216
0762cd42
RH
1217/*
1218 *** SVE Integer Misc - Unpredicated Group
1219 */
1220
3a7be554 1221static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
1222{
1223 static gen_helper_gvec_2 * const fns[4] = {
1224 NULL,
1225 gen_helper_sve_fexpa_h,
1226 gen_helper_sve_fexpa_s,
1227 gen_helper_sve_fexpa_d,
1228 };
1229 if (a->esz == 0) {
1230 return false;
1231 }
1232 if (sve_access_check(s)) {
40e32e5a 1233 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1234 }
1235 return true;
1236}
1237
3a7be554 1238static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1239{
1240 static gen_helper_gvec_3 * const fns[4] = {
1241 NULL,
1242 gen_helper_sve_ftssel_h,
1243 gen_helper_sve_ftssel_s,
1244 gen_helper_sve_ftssel_d,
1245 };
1246 if (a->esz == 0) {
1247 return false;
1248 }
1249 if (sve_access_check(s)) {
e645d1a1 1250 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1251 }
1252 return true;
1253}
1254
516e246a
RH
1255/*
1256 *** SVE Predicate Logical Operations Group
1257 */
1258
1259static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1260 const GVecGen4 *gvec_op)
1261{
1262 if (!sve_access_check(s)) {
1263 return true;
1264 }
1265
1266 unsigned psz = pred_gvec_reg_size(s);
1267 int dofs = pred_full_reg_offset(s, a->rd);
1268 int nofs = pred_full_reg_offset(s, a->rn);
1269 int mofs = pred_full_reg_offset(s, a->rm);
1270 int gofs = pred_full_reg_offset(s, a->pg);
1271
dd81a8d7
RH
1272 if (!a->s) {
1273 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1274 return true;
1275 }
1276
516e246a
RH
1277 if (psz == 8) {
1278 /* Do the operation and the flags generation in temps. */
1279 TCGv_i64 pd = tcg_temp_new_i64();
1280 TCGv_i64 pn = tcg_temp_new_i64();
1281 TCGv_i64 pm = tcg_temp_new_i64();
1282 TCGv_i64 pg = tcg_temp_new_i64();
1283
1284 tcg_gen_ld_i64(pn, cpu_env, nofs);
1285 tcg_gen_ld_i64(pm, cpu_env, mofs);
1286 tcg_gen_ld_i64(pg, cpu_env, gofs);
1287
1288 gvec_op->fni8(pd, pn, pm, pg);
1289 tcg_gen_st_i64(pd, cpu_env, dofs);
1290
1291 do_predtest1(pd, pg);
1292
1293 tcg_temp_free_i64(pd);
1294 tcg_temp_free_i64(pn);
1295 tcg_temp_free_i64(pm);
1296 tcg_temp_free_i64(pg);
1297 } else {
1298 /* The operation and flags generation is large. The computation
1299 * of the flags depends on the original contents of the guarding
1300 * predicate. If the destination overwrites the guarding predicate,
1301 * then the easiest way to get this right is to save a copy.
1302 */
1303 int tofs = gofs;
1304 if (a->rd == a->pg) {
1305 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1306 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1307 }
1308
1309 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1310 do_predtest(s, dofs, tofs, psz / 8);
1311 }
1312 return true;
1313}
1314
1315static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1316{
1317 tcg_gen_and_i64(pd, pn, pm);
1318 tcg_gen_and_i64(pd, pd, pg);
1319}
1320
1321static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1322 TCGv_vec pm, TCGv_vec pg)
1323{
1324 tcg_gen_and_vec(vece, pd, pn, pm);
1325 tcg_gen_and_vec(vece, pd, pd, pg);
1326}
1327
3a7be554 1328static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1329{
1330 static const GVecGen4 op = {
1331 .fni8 = gen_and_pg_i64,
1332 .fniv = gen_and_pg_vec,
1333 .fno = gen_helper_sve_and_pppp,
1334 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1335 };
dd81a8d7
RH
1336
1337 if (!a->s) {
1338 if (!sve_access_check(s)) {
1339 return true;
1340 }
1341 if (a->rn == a->rm) {
1342 if (a->pg == a->rn) {
1343 do_mov_p(s, a->rd, a->rn);
1344 } else {
1345 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1346 }
1347 return true;
1348 } else if (a->pg == a->rn || a->pg == a->rm) {
1349 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1350 return true;
516e246a 1351 }
516e246a 1352 }
dd81a8d7 1353 return do_pppp_flags(s, a, &op);
516e246a
RH
1354}
1355
1356static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1357{
1358 tcg_gen_andc_i64(pd, pn, pm);
1359 tcg_gen_and_i64(pd, pd, pg);
1360}
1361
1362static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1363 TCGv_vec pm, TCGv_vec pg)
1364{
1365 tcg_gen_andc_vec(vece, pd, pn, pm);
1366 tcg_gen_and_vec(vece, pd, pd, pg);
1367}
1368
3a7be554 1369static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1370{
1371 static const GVecGen4 op = {
1372 .fni8 = gen_bic_pg_i64,
1373 .fniv = gen_bic_pg_vec,
1374 .fno = gen_helper_sve_bic_pppp,
1375 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1376 };
dd81a8d7
RH
1377
1378 if (!a->s && a->pg == a->rn) {
1379 if (sve_access_check(s)) {
1380 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1381 }
1382 return true;
516e246a 1383 }
dd81a8d7 1384 return do_pppp_flags(s, a, &op);
516e246a
RH
1385}
1386
1387static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1388{
1389 tcg_gen_xor_i64(pd, pn, pm);
1390 tcg_gen_and_i64(pd, pd, pg);
1391}
1392
1393static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1394 TCGv_vec pm, TCGv_vec pg)
1395{
1396 tcg_gen_xor_vec(vece, pd, pn, pm);
1397 tcg_gen_and_vec(vece, pd, pd, pg);
1398}
1399
3a7be554 1400static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1401{
1402 static const GVecGen4 op = {
1403 .fni8 = gen_eor_pg_i64,
1404 .fniv = gen_eor_pg_vec,
1405 .fno = gen_helper_sve_eor_pppp,
1406 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1407 };
dd81a8d7 1408 return do_pppp_flags(s, a, &op);
516e246a
RH
1409}
1410
3a7be554 1411static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1412{
516e246a
RH
1413 if (a->s) {
1414 return false;
516e246a 1415 }
d4bc6232
RH
1416 if (sve_access_check(s)) {
1417 unsigned psz = pred_gvec_reg_size(s);
1418 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1419 pred_full_reg_offset(s, a->pg),
1420 pred_full_reg_offset(s, a->rn),
1421 pred_full_reg_offset(s, a->rm), psz, psz);
1422 }
1423 return true;
516e246a
RH
1424}
1425
1426static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1427{
1428 tcg_gen_or_i64(pd, pn, pm);
1429 tcg_gen_and_i64(pd, pd, pg);
1430}
1431
1432static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1433 TCGv_vec pm, TCGv_vec pg)
1434{
1435 tcg_gen_or_vec(vece, pd, pn, pm);
1436 tcg_gen_and_vec(vece, pd, pd, pg);
1437}
1438
3a7be554 1439static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1440{
1441 static const GVecGen4 op = {
1442 .fni8 = gen_orr_pg_i64,
1443 .fniv = gen_orr_pg_vec,
1444 .fno = gen_helper_sve_orr_pppp,
1445 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1446 };
dd81a8d7
RH
1447
1448 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1449 return do_mov_p(s, a->rd, a->rn);
516e246a 1450 }
dd81a8d7 1451 return do_pppp_flags(s, a, &op);
516e246a
RH
1452}
1453
1454static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1455{
1456 tcg_gen_orc_i64(pd, pn, pm);
1457 tcg_gen_and_i64(pd, pd, pg);
1458}
1459
1460static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1461 TCGv_vec pm, TCGv_vec pg)
1462{
1463 tcg_gen_orc_vec(vece, pd, pn, pm);
1464 tcg_gen_and_vec(vece, pd, pd, pg);
1465}
1466
3a7be554 1467static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1468{
1469 static const GVecGen4 op = {
1470 .fni8 = gen_orn_pg_i64,
1471 .fniv = gen_orn_pg_vec,
1472 .fno = gen_helper_sve_orn_pppp,
1473 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1474 };
dd81a8d7 1475 return do_pppp_flags(s, a, &op);
516e246a
RH
1476}
1477
1478static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1479{
1480 tcg_gen_or_i64(pd, pn, pm);
1481 tcg_gen_andc_i64(pd, pg, pd);
1482}
1483
1484static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1485 TCGv_vec pm, TCGv_vec pg)
1486{
1487 tcg_gen_or_vec(vece, pd, pn, pm);
1488 tcg_gen_andc_vec(vece, pd, pg, pd);
1489}
1490
3a7be554 1491static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1492{
1493 static const GVecGen4 op = {
1494 .fni8 = gen_nor_pg_i64,
1495 .fniv = gen_nor_pg_vec,
1496 .fno = gen_helper_sve_nor_pppp,
1497 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1498 };
dd81a8d7 1499 return do_pppp_flags(s, a, &op);
516e246a
RH
1500}
1501
1502static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1503{
1504 tcg_gen_and_i64(pd, pn, pm);
1505 tcg_gen_andc_i64(pd, pg, pd);
1506}
1507
1508static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1509 TCGv_vec pm, TCGv_vec pg)
1510{
1511 tcg_gen_and_vec(vece, pd, pn, pm);
1512 tcg_gen_andc_vec(vece, pd, pg, pd);
1513}
1514
3a7be554 1515static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1516{
1517 static const GVecGen4 op = {
1518 .fni8 = gen_nand_pg_i64,
1519 .fniv = gen_nand_pg_vec,
1520 .fno = gen_helper_sve_nand_pppp,
1521 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1522 };
dd81a8d7 1523 return do_pppp_flags(s, a, &op);
516e246a
RH
1524}
1525
9e18d7a6
RH
1526/*
1527 *** SVE Predicate Misc Group
1528 */
1529
3a7be554 1530static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1531{
1532 if (sve_access_check(s)) {
1533 int nofs = pred_full_reg_offset(s, a->rn);
1534 int gofs = pred_full_reg_offset(s, a->pg);
1535 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1536
1537 if (words == 1) {
1538 TCGv_i64 pn = tcg_temp_new_i64();
1539 TCGv_i64 pg = tcg_temp_new_i64();
1540
1541 tcg_gen_ld_i64(pn, cpu_env, nofs);
1542 tcg_gen_ld_i64(pg, cpu_env, gofs);
1543 do_predtest1(pn, pg);
1544
1545 tcg_temp_free_i64(pn);
1546 tcg_temp_free_i64(pg);
1547 } else {
1548 do_predtest(s, nofs, gofs, words);
1549 }
1550 }
1551 return true;
1552}
1553
028e2a7b
RH
1554/* See the ARM pseudocode DecodePredCount. */
1555static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1556{
1557 unsigned elements = fullsz >> esz;
1558 unsigned bound;
1559
1560 switch (pattern) {
1561 case 0x0: /* POW2 */
1562 return pow2floor(elements);
1563 case 0x1: /* VL1 */
1564 case 0x2: /* VL2 */
1565 case 0x3: /* VL3 */
1566 case 0x4: /* VL4 */
1567 case 0x5: /* VL5 */
1568 case 0x6: /* VL6 */
1569 case 0x7: /* VL7 */
1570 case 0x8: /* VL8 */
1571 bound = pattern;
1572 break;
1573 case 0x9: /* VL16 */
1574 case 0xa: /* VL32 */
1575 case 0xb: /* VL64 */
1576 case 0xc: /* VL128 */
1577 case 0xd: /* VL256 */
1578 bound = 16 << (pattern - 9);
1579 break;
1580 case 0x1d: /* MUL4 */
1581 return elements - elements % 4;
1582 case 0x1e: /* MUL3 */
1583 return elements - elements % 3;
1584 case 0x1f: /* ALL */
1585 return elements;
1586 default: /* #uimm5 */
1587 return 0;
1588 }
1589 return elements >= bound ? bound : 0;
1590}
1591
1592/* This handles all of the predicate initialization instructions,
1593 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1594 * so that decode_pred_count returns 0. For SETFFR, we will have
1595 * set RD == 16 == FFR.
1596 */
1597static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1598{
1599 if (!sve_access_check(s)) {
1600 return true;
1601 }
1602
1603 unsigned fullsz = vec_full_reg_size(s);
1604 unsigned ofs = pred_full_reg_offset(s, rd);
1605 unsigned numelem, setsz, i;
1606 uint64_t word, lastword;
1607 TCGv_i64 t;
1608
1609 numelem = decode_pred_count(fullsz, pat, esz);
1610
1611 /* Determine what we must store into each bit, and how many. */
1612 if (numelem == 0) {
1613 lastword = word = 0;
1614 setsz = fullsz;
1615 } else {
1616 setsz = numelem << esz;
1617 lastword = word = pred_esz_masks[esz];
1618 if (setsz % 64) {
973558a3 1619 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1620 }
1621 }
1622
1623 t = tcg_temp_new_i64();
1624 if (fullsz <= 64) {
1625 tcg_gen_movi_i64(t, lastword);
1626 tcg_gen_st_i64(t, cpu_env, ofs);
1627 goto done;
1628 }
1629
1630 if (word == lastword) {
1631 unsigned maxsz = size_for_gvec(fullsz / 8);
1632 unsigned oprsz = size_for_gvec(setsz / 8);
1633
1634 if (oprsz * 8 == setsz) {
8711e71f 1635 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1636 goto done;
1637 }
028e2a7b
RH
1638 }
1639
1640 setsz /= 8;
1641 fullsz /= 8;
1642
1643 tcg_gen_movi_i64(t, word);
973558a3 1644 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1645 tcg_gen_st_i64(t, cpu_env, ofs + i);
1646 }
1647 if (lastword != word) {
1648 tcg_gen_movi_i64(t, lastword);
1649 tcg_gen_st_i64(t, cpu_env, ofs + i);
1650 i += 8;
1651 }
1652 if (i < fullsz) {
1653 tcg_gen_movi_i64(t, 0);
1654 for (; i < fullsz; i += 8) {
1655 tcg_gen_st_i64(t, cpu_env, ofs + i);
1656 }
1657 }
1658
1659 done:
1660 tcg_temp_free_i64(t);
1661
1662 /* PTRUES */
1663 if (setflag) {
1664 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1665 tcg_gen_movi_i32(cpu_CF, word == 0);
1666 tcg_gen_movi_i32(cpu_VF, 0);
1667 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1668 }
1669 return true;
1670}
1671
3a7be554 1672static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1673{
1674 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1675}
1676
3a7be554 1677static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1678{
1679 /* Note pat == 31 is #all, to set all elements. */
1680 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1681}
1682
3a7be554 1683static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1684{
1685 /* Note pat == 32 is #unimp, to set no elements. */
1686 return do_predset(s, 0, a->rd, 32, false);
1687}
1688
3a7be554 1689static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1690{
1691 /* The path through do_pppp_flags is complicated enough to want to avoid
1692 * duplication. Frob the arguments into the form of a predicated AND.
1693 */
1694 arg_rprr_s alt_a = {
1695 .rd = a->rd, .pg = a->pg, .s = a->s,
1696 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1697 };
3a7be554 1698 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1699}
1700
3a7be554 1701static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1702{
1703 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1704}
1705
3a7be554 1706static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1707{
1708 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1709}
1710
1711static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1712 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1713 TCGv_ptr, TCGv_i32))
1714{
1715 if (!sve_access_check(s)) {
1716 return true;
1717 }
1718
1719 TCGv_ptr t_pd = tcg_temp_new_ptr();
1720 TCGv_ptr t_pg = tcg_temp_new_ptr();
1721 TCGv_i32 t;
86300b5d 1722 unsigned desc = 0;
028e2a7b 1723
86300b5d
RH
1724 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1725 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1726
1727 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1728 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1729 t = tcg_const_i32(desc);
1730
1731 gen_fn(t, t_pd, t_pg, t);
1732 tcg_temp_free_ptr(t_pd);
1733 tcg_temp_free_ptr(t_pg);
1734
1735 do_pred_flags(t);
1736 tcg_temp_free_i32(t);
1737 return true;
1738}
1739
3a7be554 1740static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1741{
1742 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1743}
1744
3a7be554 1745static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1746{
1747 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1748}
1749
24e82e68
RH
1750/*
1751 *** SVE Element Count Group
1752 */
1753
1754/* Perform an inline saturating addition of a 32-bit value within
1755 * a 64-bit register. The second operand is known to be positive,
1756 * which halves the comparisions we must perform to bound the result.
1757 */
1758static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1759{
1760 int64_t ibound;
1761 TCGv_i64 bound;
1762 TCGCond cond;
1763
1764 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1765 if (u) {
1766 tcg_gen_ext32u_i64(reg, reg);
1767 } else {
1768 tcg_gen_ext32s_i64(reg, reg);
1769 }
1770 if (d) {
1771 tcg_gen_sub_i64(reg, reg, val);
1772 ibound = (u ? 0 : INT32_MIN);
1773 cond = TCG_COND_LT;
1774 } else {
1775 tcg_gen_add_i64(reg, reg, val);
1776 ibound = (u ? UINT32_MAX : INT32_MAX);
1777 cond = TCG_COND_GT;
1778 }
1779 bound = tcg_const_i64(ibound);
1780 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1781 tcg_temp_free_i64(bound);
1782}
1783
1784/* Similarly with 64-bit values. */
1785static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1786{
1787 TCGv_i64 t0 = tcg_temp_new_i64();
1788 TCGv_i64 t1 = tcg_temp_new_i64();
1789 TCGv_i64 t2;
1790
1791 if (u) {
1792 if (d) {
1793 tcg_gen_sub_i64(t0, reg, val);
1794 tcg_gen_movi_i64(t1, 0);
1795 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1796 } else {
1797 tcg_gen_add_i64(t0, reg, val);
1798 tcg_gen_movi_i64(t1, -1);
1799 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1800 }
1801 } else {
1802 if (d) {
1803 /* Detect signed overflow for subtraction. */
1804 tcg_gen_xor_i64(t0, reg, val);
1805 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1806 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1807 tcg_gen_and_i64(t0, t0, reg);
1808
1809 /* Bound the result. */
1810 tcg_gen_movi_i64(reg, INT64_MIN);
1811 t2 = tcg_const_i64(0);
1812 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1813 } else {
1814 /* Detect signed overflow for addition. */
1815 tcg_gen_xor_i64(t0, reg, val);
1816 tcg_gen_add_i64(reg, reg, val);
1817 tcg_gen_xor_i64(t1, reg, val);
1818 tcg_gen_andc_i64(t0, t1, t0);
1819
1820 /* Bound the result. */
1821 tcg_gen_movi_i64(t1, INT64_MAX);
1822 t2 = tcg_const_i64(0);
1823 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1824 }
1825 tcg_temp_free_i64(t2);
1826 }
1827 tcg_temp_free_i64(t0);
1828 tcg_temp_free_i64(t1);
1829}
1830
1831/* Similarly with a vector and a scalar operand. */
1832static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1833 TCGv_i64 val, bool u, bool d)
1834{
1835 unsigned vsz = vec_full_reg_size(s);
1836 TCGv_ptr dptr, nptr;
1837 TCGv_i32 t32, desc;
1838 TCGv_i64 t64;
1839
1840 dptr = tcg_temp_new_ptr();
1841 nptr = tcg_temp_new_ptr();
1842 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1843 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1844 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1845
1846 switch (esz) {
1847 case MO_8:
1848 t32 = tcg_temp_new_i32();
1849 tcg_gen_extrl_i64_i32(t32, val);
1850 if (d) {
1851 tcg_gen_neg_i32(t32, t32);
1852 }
1853 if (u) {
1854 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1855 } else {
1856 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1857 }
1858 tcg_temp_free_i32(t32);
1859 break;
1860
1861 case MO_16:
1862 t32 = tcg_temp_new_i32();
1863 tcg_gen_extrl_i64_i32(t32, val);
1864 if (d) {
1865 tcg_gen_neg_i32(t32, t32);
1866 }
1867 if (u) {
1868 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1869 } else {
1870 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1871 }
1872 tcg_temp_free_i32(t32);
1873 break;
1874
1875 case MO_32:
1876 t64 = tcg_temp_new_i64();
1877 if (d) {
1878 tcg_gen_neg_i64(t64, val);
1879 } else {
1880 tcg_gen_mov_i64(t64, val);
1881 }
1882 if (u) {
1883 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1884 } else {
1885 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1886 }
1887 tcg_temp_free_i64(t64);
1888 break;
1889
1890 case MO_64:
1891 if (u) {
1892 if (d) {
1893 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1894 } else {
1895 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1896 }
1897 } else if (d) {
1898 t64 = tcg_temp_new_i64();
1899 tcg_gen_neg_i64(t64, val);
1900 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1901 tcg_temp_free_i64(t64);
1902 } else {
1903 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1904 }
1905 break;
1906
1907 default:
1908 g_assert_not_reached();
1909 }
1910
1911 tcg_temp_free_ptr(dptr);
1912 tcg_temp_free_ptr(nptr);
1913 tcg_temp_free_i32(desc);
1914}
1915
3a7be554 1916static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1917{
1918 if (sve_access_check(s)) {
1919 unsigned fullsz = vec_full_reg_size(s);
1920 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1921 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1922 }
1923 return true;
1924}
1925
3a7be554 1926static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1927{
1928 if (sve_access_check(s)) {
1929 unsigned fullsz = vec_full_reg_size(s);
1930 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1931 int inc = numelem * a->imm * (a->d ? -1 : 1);
1932 TCGv_i64 reg = cpu_reg(s, a->rd);
1933
1934 tcg_gen_addi_i64(reg, reg, inc);
1935 }
1936 return true;
1937}
1938
3a7be554 1939static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1940{
1941 if (!sve_access_check(s)) {
1942 return true;
1943 }
1944
1945 unsigned fullsz = vec_full_reg_size(s);
1946 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1947 int inc = numelem * a->imm;
1948 TCGv_i64 reg = cpu_reg(s, a->rd);
1949
1950 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1951 if (inc == 0) {
1952 if (a->u) {
1953 tcg_gen_ext32u_i64(reg, reg);
1954 } else {
1955 tcg_gen_ext32s_i64(reg, reg);
1956 }
1957 } else {
1958 TCGv_i64 t = tcg_const_i64(inc);
1959 do_sat_addsub_32(reg, t, a->u, a->d);
1960 tcg_temp_free_i64(t);
1961 }
1962 return true;
1963}
1964
3a7be554 1965static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1966{
1967 if (!sve_access_check(s)) {
1968 return true;
1969 }
1970
1971 unsigned fullsz = vec_full_reg_size(s);
1972 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1973 int inc = numelem * a->imm;
1974 TCGv_i64 reg = cpu_reg(s, a->rd);
1975
1976 if (inc != 0) {
1977 TCGv_i64 t = tcg_const_i64(inc);
1978 do_sat_addsub_64(reg, t, a->u, a->d);
1979 tcg_temp_free_i64(t);
1980 }
1981 return true;
1982}
1983
3a7be554 1984static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1985{
1986 if (a->esz == 0) {
1987 return false;
1988 }
1989
1990 unsigned fullsz = vec_full_reg_size(s);
1991 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1992 int inc = numelem * a->imm;
1993
1994 if (inc != 0) {
1995 if (sve_access_check(s)) {
1996 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1997 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1998 vec_full_reg_offset(s, a->rn),
1999 t, fullsz, fullsz);
2000 tcg_temp_free_i64(t);
2001 }
2002 } else {
2003 do_mov_z(s, a->rd, a->rn);
2004 }
2005 return true;
2006}
2007
3a7be554 2008static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2009{
2010 if (a->esz == 0) {
2011 return false;
2012 }
2013
2014 unsigned fullsz = vec_full_reg_size(s);
2015 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2016 int inc = numelem * a->imm;
2017
2018 if (inc != 0) {
2019 if (sve_access_check(s)) {
2020 TCGv_i64 t = tcg_const_i64(inc);
2021 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
2022 tcg_temp_free_i64(t);
2023 }
2024 } else {
2025 do_mov_z(s, a->rd, a->rn);
2026 }
2027 return true;
2028}
2029
e1fa1164
RH
2030/*
2031 *** SVE Bitwise Immediate Group
2032 */
2033
2034static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2035{
2036 uint64_t imm;
2037 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2038 extract32(a->dbm, 0, 6),
2039 extract32(a->dbm, 6, 6))) {
2040 return false;
2041 }
2042 if (sve_access_check(s)) {
2043 unsigned vsz = vec_full_reg_size(s);
2044 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2045 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2046 }
2047 return true;
2048}
2049
3a7be554 2050static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2051{
2052 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2053}
2054
3a7be554 2055static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2056{
2057 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2058}
2059
3a7be554 2060static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2061{
2062 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2063}
2064
3a7be554 2065static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2066{
2067 uint64_t imm;
2068 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2069 extract32(a->dbm, 0, 6),
2070 extract32(a->dbm, 6, 6))) {
2071 return false;
2072 }
2073 if (sve_access_check(s)) {
2074 do_dupi_z(s, a->rd, imm);
2075 }
2076 return true;
2077}
2078
f25a2361
RH
2079/*
2080 *** SVE Integer Wide Immediate - Predicated Group
2081 */
2082
2083/* Implement all merging copies. This is used for CPY (immediate),
2084 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2085 */
2086static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2087 TCGv_i64 val)
2088{
2089 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2090 static gen_cpy * const fns[4] = {
2091 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2092 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2093 };
2094 unsigned vsz = vec_full_reg_size(s);
2095 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2096 TCGv_ptr t_zd = tcg_temp_new_ptr();
2097 TCGv_ptr t_zn = tcg_temp_new_ptr();
2098 TCGv_ptr t_pg = tcg_temp_new_ptr();
2099
2100 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2101 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2102 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2103
2104 fns[esz](t_zd, t_zn, t_pg, val, desc);
2105
2106 tcg_temp_free_ptr(t_zd);
2107 tcg_temp_free_ptr(t_zn);
2108 tcg_temp_free_ptr(t_pg);
2109 tcg_temp_free_i32(desc);
2110}
2111
3a7be554 2112static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2113{
2114 if (a->esz == 0) {
2115 return false;
2116 }
2117 if (sve_access_check(s)) {
2118 /* Decode the VFP immediate. */
2119 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2120 TCGv_i64 t_imm = tcg_const_i64(imm);
2121 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2122 tcg_temp_free_i64(t_imm);
2123 }
2124 return true;
2125}
2126
3a7be554 2127static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2128{
3a7be554 2129 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2130 return false;
2131 }
2132 if (sve_access_check(s)) {
2133 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2134 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2135 tcg_temp_free_i64(t_imm);
2136 }
2137 return true;
2138}
2139
3a7be554 2140static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2141{
2142 static gen_helper_gvec_2i * const fns[4] = {
2143 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2144 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2145 };
2146
3a7be554 2147 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2148 return false;
2149 }
2150 if (sve_access_check(s)) {
2151 unsigned vsz = vec_full_reg_size(s);
2152 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2153 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2154 pred_full_reg_offset(s, a->pg),
2155 t_imm, vsz, vsz, 0, fns[a->esz]);
2156 tcg_temp_free_i64(t_imm);
2157 }
2158 return true;
2159}
2160
b94f8f60
RH
2161/*
2162 *** SVE Permute Extract Group
2163 */
2164
3a7be554 2165static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
2166{
2167 if (!sve_access_check(s)) {
2168 return true;
2169 }
2170
2171 unsigned vsz = vec_full_reg_size(s);
2172 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
2173 unsigned n_siz = vsz - n_ofs;
2174 unsigned d = vec_full_reg_offset(s, a->rd);
2175 unsigned n = vec_full_reg_offset(s, a->rn);
2176 unsigned m = vec_full_reg_offset(s, a->rm);
2177
2178 /* Use host vector move insns if we have appropriate sizes
2179 * and no unfortunate overlap.
2180 */
2181 if (m != d
2182 && n_ofs == size_for_gvec(n_ofs)
2183 && n_siz == size_for_gvec(n_siz)
2184 && (d != n || n_siz <= n_ofs)) {
2185 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2186 if (n_ofs != 0) {
2187 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2188 }
2189 } else {
2190 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2191 }
2192 return true;
2193}
2194
30562ab7
RH
2195/*
2196 *** SVE Permute - Unpredicated Group
2197 */
2198
3a7be554 2199static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2200{
2201 if (sve_access_check(s)) {
2202 unsigned vsz = vec_full_reg_size(s);
2203 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2204 vsz, vsz, cpu_reg_sp(s, a->rn));
2205 }
2206 return true;
2207}
2208
3a7be554 2209static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2210{
2211 if ((a->imm & 0x1f) == 0) {
2212 return false;
2213 }
2214 if (sve_access_check(s)) {
2215 unsigned vsz = vec_full_reg_size(s);
2216 unsigned dofs = vec_full_reg_offset(s, a->rd);
2217 unsigned esz, index;
2218
2219 esz = ctz32(a->imm);
2220 index = a->imm >> (esz + 1);
2221
2222 if ((index << esz) < vsz) {
2223 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2224 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2225 } else {
7e17d50e
RH
2226 /*
2227 * While dup_mem handles 128-bit elements, dup_imm does not.
2228 * Thankfully element size doesn't matter for splatting zero.
2229 */
2230 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2231 }
2232 }
2233 return true;
2234}
2235
2236static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2237{
2238 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2239 static gen_insr * const fns[4] = {
2240 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2241 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2242 };
2243 unsigned vsz = vec_full_reg_size(s);
2244 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2245 TCGv_ptr t_zd = tcg_temp_new_ptr();
2246 TCGv_ptr t_zn = tcg_temp_new_ptr();
2247
2248 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2249 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2250
2251 fns[a->esz](t_zd, t_zn, val, desc);
2252
2253 tcg_temp_free_ptr(t_zd);
2254 tcg_temp_free_ptr(t_zn);
2255 tcg_temp_free_i32(desc);
2256}
2257
3a7be554 2258static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2259{
2260 if (sve_access_check(s)) {
2261 TCGv_i64 t = tcg_temp_new_i64();
2262 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2263 do_insr_i64(s, a, t);
2264 tcg_temp_free_i64(t);
2265 }
2266 return true;
2267}
2268
3a7be554 2269static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2270{
2271 if (sve_access_check(s)) {
2272 do_insr_i64(s, a, cpu_reg(s, a->rm));
2273 }
2274 return true;
2275}
2276
3a7be554 2277static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2278{
2279 static gen_helper_gvec_2 * const fns[4] = {
2280 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2281 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2282 };
2283
2284 if (sve_access_check(s)) {
40e32e5a 2285 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2286 }
2287 return true;
2288}
2289
3a7be554 2290static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2291{
2292 static gen_helper_gvec_3 * const fns[4] = {
2293 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2294 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2295 };
2296
2297 if (sve_access_check(s)) {
e645d1a1 2298 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2299 }
2300 return true;
2301}
2302
3a7be554 2303static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2304{
2305 static gen_helper_gvec_2 * const fns[4][2] = {
2306 { NULL, NULL },
2307 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2308 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2309 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2310 };
2311
2312 if (a->esz == 0) {
2313 return false;
2314 }
2315 if (sve_access_check(s)) {
2316 unsigned vsz = vec_full_reg_size(s);
2317 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2318 vec_full_reg_offset(s, a->rn)
2319 + (a->h ? vsz / 2 : 0),
2320 vsz, vsz, 0, fns[a->esz][a->u]);
2321 }
2322 return true;
2323}
2324
d731d8cb
RH
2325/*
2326 *** SVE Permute - Predicates Group
2327 */
2328
2329static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2330 gen_helper_gvec_3 *fn)
2331{
2332 if (!sve_access_check(s)) {
2333 return true;
2334 }
2335
2336 unsigned vsz = pred_full_reg_size(s);
2337
d731d8cb
RH
2338 TCGv_ptr t_d = tcg_temp_new_ptr();
2339 TCGv_ptr t_n = tcg_temp_new_ptr();
2340 TCGv_ptr t_m = tcg_temp_new_ptr();
2341 TCGv_i32 t_desc;
f9b0fcce 2342 uint32_t desc = 0;
d731d8cb 2343
f9b0fcce
RH
2344 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2345 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2346 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2347
2348 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2349 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2350 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2351 t_desc = tcg_const_i32(desc);
2352
2353 fn(t_d, t_n, t_m, t_desc);
2354
2355 tcg_temp_free_ptr(t_d);
2356 tcg_temp_free_ptr(t_n);
2357 tcg_temp_free_ptr(t_m);
2358 tcg_temp_free_i32(t_desc);
2359 return true;
2360}
2361
2362static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2363 gen_helper_gvec_2 *fn)
2364{
2365 if (!sve_access_check(s)) {
2366 return true;
2367 }
2368
2369 unsigned vsz = pred_full_reg_size(s);
2370 TCGv_ptr t_d = tcg_temp_new_ptr();
2371 TCGv_ptr t_n = tcg_temp_new_ptr();
2372 TCGv_i32 t_desc;
70acaafe 2373 uint32_t desc = 0;
d731d8cb
RH
2374
2375 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2376 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2377
70acaafe
RH
2378 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2379 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2380 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2381 t_desc = tcg_const_i32(desc);
2382
2383 fn(t_d, t_n, t_desc);
2384
2385 tcg_temp_free_i32(t_desc);
2386 tcg_temp_free_ptr(t_d);
2387 tcg_temp_free_ptr(t_n);
2388 return true;
2389}
2390
3a7be554 2391static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2392{
2393 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2394}
2395
3a7be554 2396static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2397{
2398 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2399}
2400
3a7be554 2401static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2402{
2403 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2404}
2405
3a7be554 2406static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2407{
2408 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2409}
2410
3a7be554 2411static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2412{
2413 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2414}
2415
3a7be554 2416static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2417{
2418 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2419}
2420
3a7be554 2421static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2422{
2423 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2424}
2425
3a7be554 2426static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2427{
2428 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2429}
2430
3a7be554 2431static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2432{
2433 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2434}
2435
234b48e9
RH
2436/*
2437 *** SVE Permute - Interleaving Group
2438 */
2439
2440static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2441{
2442 static gen_helper_gvec_3 * const fns[4] = {
2443 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2444 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2445 };
2446
2447 if (sve_access_check(s)) {
2448 unsigned vsz = vec_full_reg_size(s);
2449 unsigned high_ofs = high ? vsz / 2 : 0;
2450 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2451 vec_full_reg_offset(s, a->rn) + high_ofs,
2452 vec_full_reg_offset(s, a->rm) + high_ofs,
2453 vsz, vsz, 0, fns[a->esz]);
2454 }
2455 return true;
2456}
2457
2458static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2459 gen_helper_gvec_3 *fn)
2460{
2461 if (sve_access_check(s)) {
e645d1a1 2462 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2463 }
2464 return true;
2465}
2466
3a7be554 2467static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2468{
2469 return do_zip(s, a, false);
2470}
2471
3a7be554 2472static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2473{
2474 return do_zip(s, a, true);
2475}
2476
2477static gen_helper_gvec_3 * const uzp_fns[4] = {
2478 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2479 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2480};
2481
3a7be554 2482static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2483{
2484 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2485}
2486
3a7be554 2487static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2488{
2489 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2490}
2491
2492static gen_helper_gvec_3 * const trn_fns[4] = {
2493 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2494 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2495};
2496
3a7be554 2497static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2498{
2499 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2500}
2501
3a7be554 2502static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2503{
2504 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2505}
2506
3ca879ae
RH
2507/*
2508 *** SVE Permute Vector - Predicated Group
2509 */
2510
3a7be554 2511static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2512{
2513 static gen_helper_gvec_3 * const fns[4] = {
2514 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2515 };
2516 return do_zpz_ool(s, a, fns[a->esz]);
2517}
2518
ef23cb72
RH
2519/* Call the helper that computes the ARM LastActiveElement pseudocode
2520 * function, scaled by the element size. This includes the not found
2521 * indication; e.g. not found for esz=3 is -8.
2522 */
2523static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2524{
2525 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2526 * round up, as we do elsewhere, because we need the exact size.
2527 */
2528 TCGv_ptr t_p = tcg_temp_new_ptr();
2529 TCGv_i32 t_desc;
2acbfbe4 2530 unsigned desc = 0;
ef23cb72 2531
2acbfbe4
RH
2532 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2533 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2534
2535 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2536 t_desc = tcg_const_i32(desc);
2537
2538 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2539
2540 tcg_temp_free_i32(t_desc);
2541 tcg_temp_free_ptr(t_p);
2542}
2543
2544/* Increment LAST to the offset of the next element in the vector,
2545 * wrapping around to 0.
2546 */
2547static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2548{
2549 unsigned vsz = vec_full_reg_size(s);
2550
2551 tcg_gen_addi_i32(last, last, 1 << esz);
2552 if (is_power_of_2(vsz)) {
2553 tcg_gen_andi_i32(last, last, vsz - 1);
2554 } else {
2555 TCGv_i32 max = tcg_const_i32(vsz);
2556 TCGv_i32 zero = tcg_const_i32(0);
2557 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2558 tcg_temp_free_i32(max);
2559 tcg_temp_free_i32(zero);
2560 }
2561}
2562
2563/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2564static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2565{
2566 unsigned vsz = vec_full_reg_size(s);
2567
2568 if (is_power_of_2(vsz)) {
2569 tcg_gen_andi_i32(last, last, vsz - 1);
2570 } else {
2571 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2572 TCGv_i32 zero = tcg_const_i32(0);
2573 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2574 tcg_temp_free_i32(max);
2575 tcg_temp_free_i32(zero);
2576 }
2577}
2578
2579/* Load an unsigned element of ESZ from BASE+OFS. */
2580static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2581{
2582 TCGv_i64 r = tcg_temp_new_i64();
2583
2584 switch (esz) {
2585 case 0:
2586 tcg_gen_ld8u_i64(r, base, ofs);
2587 break;
2588 case 1:
2589 tcg_gen_ld16u_i64(r, base, ofs);
2590 break;
2591 case 2:
2592 tcg_gen_ld32u_i64(r, base, ofs);
2593 break;
2594 case 3:
2595 tcg_gen_ld_i64(r, base, ofs);
2596 break;
2597 default:
2598 g_assert_not_reached();
2599 }
2600 return r;
2601}
2602
2603/* Load an unsigned element of ESZ from RM[LAST]. */
2604static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2605 int rm, int esz)
2606{
2607 TCGv_ptr p = tcg_temp_new_ptr();
2608 TCGv_i64 r;
2609
2610 /* Convert offset into vector into offset into ENV.
2611 * The final adjustment for the vector register base
2612 * is added via constant offset to the load.
2613 */
2614#ifdef HOST_WORDS_BIGENDIAN
2615 /* Adjust for element ordering. See vec_reg_offset. */
2616 if (esz < 3) {
2617 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2618 }
2619#endif
2620 tcg_gen_ext_i32_ptr(p, last);
2621 tcg_gen_add_ptr(p, p, cpu_env);
2622
2623 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2624 tcg_temp_free_ptr(p);
2625
2626 return r;
2627}
2628
2629/* Compute CLAST for a Zreg. */
2630static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2631{
2632 TCGv_i32 last;
2633 TCGLabel *over;
2634 TCGv_i64 ele;
2635 unsigned vsz, esz = a->esz;
2636
2637 if (!sve_access_check(s)) {
2638 return true;
2639 }
2640
2641 last = tcg_temp_local_new_i32();
2642 over = gen_new_label();
2643
2644 find_last_active(s, last, esz, a->pg);
2645
2646 /* There is of course no movcond for a 2048-bit vector,
2647 * so we must branch over the actual store.
2648 */
2649 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2650
2651 if (!before) {
2652 incr_last_active(s, last, esz);
2653 }
2654
2655 ele = load_last_active(s, last, a->rm, esz);
2656 tcg_temp_free_i32(last);
2657
2658 vsz = vec_full_reg_size(s);
2659 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2660 tcg_temp_free_i64(ele);
2661
2662 /* If this insn used MOVPRFX, we may need a second move. */
2663 if (a->rd != a->rn) {
2664 TCGLabel *done = gen_new_label();
2665 tcg_gen_br(done);
2666
2667 gen_set_label(over);
2668 do_mov_z(s, a->rd, a->rn);
2669
2670 gen_set_label(done);
2671 } else {
2672 gen_set_label(over);
2673 }
2674 return true;
2675}
2676
3a7be554 2677static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2678{
2679 return do_clast_vector(s, a, false);
2680}
2681
3a7be554 2682static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2683{
2684 return do_clast_vector(s, a, true);
2685}
2686
2687/* Compute CLAST for a scalar. */
2688static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2689 bool before, TCGv_i64 reg_val)
2690{
2691 TCGv_i32 last = tcg_temp_new_i32();
2692 TCGv_i64 ele, cmp, zero;
2693
2694 find_last_active(s, last, esz, pg);
2695
2696 /* Extend the original value of last prior to incrementing. */
2697 cmp = tcg_temp_new_i64();
2698 tcg_gen_ext_i32_i64(cmp, last);
2699
2700 if (!before) {
2701 incr_last_active(s, last, esz);
2702 }
2703
2704 /* The conceit here is that while last < 0 indicates not found, after
2705 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2706 * from which we can load garbage. We then discard the garbage with
2707 * a conditional move.
2708 */
2709 ele = load_last_active(s, last, rm, esz);
2710 tcg_temp_free_i32(last);
2711
2712 zero = tcg_const_i64(0);
2713 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2714
2715 tcg_temp_free_i64(zero);
2716 tcg_temp_free_i64(cmp);
2717 tcg_temp_free_i64(ele);
2718}
2719
2720/* Compute CLAST for a Vreg. */
2721static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2722{
2723 if (sve_access_check(s)) {
2724 int esz = a->esz;
2725 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2726 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2727
2728 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2729 write_fp_dreg(s, a->rd, reg);
2730 tcg_temp_free_i64(reg);
2731 }
2732 return true;
2733}
2734
3a7be554 2735static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2736{
2737 return do_clast_fp(s, a, false);
2738}
2739
3a7be554 2740static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2741{
2742 return do_clast_fp(s, a, true);
2743}
2744
2745/* Compute CLAST for a Xreg. */
2746static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2747{
2748 TCGv_i64 reg;
2749
2750 if (!sve_access_check(s)) {
2751 return true;
2752 }
2753
2754 reg = cpu_reg(s, a->rd);
2755 switch (a->esz) {
2756 case 0:
2757 tcg_gen_ext8u_i64(reg, reg);
2758 break;
2759 case 1:
2760 tcg_gen_ext16u_i64(reg, reg);
2761 break;
2762 case 2:
2763 tcg_gen_ext32u_i64(reg, reg);
2764 break;
2765 case 3:
2766 break;
2767 default:
2768 g_assert_not_reached();
2769 }
2770
2771 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2772 return true;
2773}
2774
3a7be554 2775static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2776{
2777 return do_clast_general(s, a, false);
2778}
2779
3a7be554 2780static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2781{
2782 return do_clast_general(s, a, true);
2783}
2784
2785/* Compute LAST for a scalar. */
2786static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2787 int pg, int rm, bool before)
2788{
2789 TCGv_i32 last = tcg_temp_new_i32();
2790 TCGv_i64 ret;
2791
2792 find_last_active(s, last, esz, pg);
2793 if (before) {
2794 wrap_last_active(s, last, esz);
2795 } else {
2796 incr_last_active(s, last, esz);
2797 }
2798
2799 ret = load_last_active(s, last, rm, esz);
2800 tcg_temp_free_i32(last);
2801 return ret;
2802}
2803
2804/* Compute LAST for a Vreg. */
2805static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2806{
2807 if (sve_access_check(s)) {
2808 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2809 write_fp_dreg(s, a->rd, val);
2810 tcg_temp_free_i64(val);
2811 }
2812 return true;
2813}
2814
3a7be554 2815static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2816{
2817 return do_last_fp(s, a, false);
2818}
2819
3a7be554 2820static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2821{
2822 return do_last_fp(s, a, true);
2823}
2824
2825/* Compute LAST for a Xreg. */
2826static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2827{
2828 if (sve_access_check(s)) {
2829 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2830 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2831 tcg_temp_free_i64(val);
2832 }
2833 return true;
2834}
2835
3a7be554 2836static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2837{
2838 return do_last_general(s, a, false);
2839}
2840
3a7be554 2841static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2842{
2843 return do_last_general(s, a, true);
2844}
2845
3a7be554 2846static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2847{
2848 if (sve_access_check(s)) {
2849 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2850 }
2851 return true;
2852}
2853
3a7be554 2854static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2855{
2856 if (sve_access_check(s)) {
2857 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2858 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2859 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2860 tcg_temp_free_i64(t);
2861 }
2862 return true;
2863}
2864
3a7be554 2865static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2866{
2867 static gen_helper_gvec_3 * const fns[4] = {
2868 NULL,
2869 gen_helper_sve_revb_h,
2870 gen_helper_sve_revb_s,
2871 gen_helper_sve_revb_d,
2872 };
2873 return do_zpz_ool(s, a, fns[a->esz]);
2874}
2875
3a7be554 2876static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2877{
2878 static gen_helper_gvec_3 * const fns[4] = {
2879 NULL,
2880 NULL,
2881 gen_helper_sve_revh_s,
2882 gen_helper_sve_revh_d,
2883 };
2884 return do_zpz_ool(s, a, fns[a->esz]);
2885}
2886
3a7be554 2887static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2888{
2889 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2890}
2891
3a7be554 2892static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2893{
2894 static gen_helper_gvec_3 * const fns[4] = {
2895 gen_helper_sve_rbit_b,
2896 gen_helper_sve_rbit_h,
2897 gen_helper_sve_rbit_s,
2898 gen_helper_sve_rbit_d,
2899 };
2900 return do_zpz_ool(s, a, fns[a->esz]);
2901}
2902
3a7be554 2903static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2904{
2905 if (sve_access_check(s)) {
36cbb7a8 2906 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 2907 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2908 }
2909 return true;
2910}
2911
757f9cff
RH
2912/*
2913 *** SVE Integer Compare - Vectors Group
2914 */
2915
2916static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2917 gen_helper_gvec_flags_4 *gen_fn)
2918{
2919 TCGv_ptr pd, zn, zm, pg;
2920 unsigned vsz;
2921 TCGv_i32 t;
2922
2923 if (gen_fn == NULL) {
2924 return false;
2925 }
2926 if (!sve_access_check(s)) {
2927 return true;
2928 }
2929
2930 vsz = vec_full_reg_size(s);
2931 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2932 pd = tcg_temp_new_ptr();
2933 zn = tcg_temp_new_ptr();
2934 zm = tcg_temp_new_ptr();
2935 pg = tcg_temp_new_ptr();
2936
2937 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2938 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2939 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2940 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2941
2942 gen_fn(t, pd, zn, zm, pg, t);
2943
2944 tcg_temp_free_ptr(pd);
2945 tcg_temp_free_ptr(zn);
2946 tcg_temp_free_ptr(zm);
2947 tcg_temp_free_ptr(pg);
2948
2949 do_pred_flags(t);
2950
2951 tcg_temp_free_i32(t);
2952 return true;
2953}
2954
2955#define DO_PPZZ(NAME, name) \
3a7be554 2956static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2957{ \
2958 static gen_helper_gvec_flags_4 * const fns[4] = { \
2959 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2960 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2961 }; \
2962 return do_ppzz_flags(s, a, fns[a->esz]); \
2963}
2964
2965DO_PPZZ(CMPEQ, cmpeq)
2966DO_PPZZ(CMPNE, cmpne)
2967DO_PPZZ(CMPGT, cmpgt)
2968DO_PPZZ(CMPGE, cmpge)
2969DO_PPZZ(CMPHI, cmphi)
2970DO_PPZZ(CMPHS, cmphs)
2971
2972#undef DO_PPZZ
2973
2974#define DO_PPZW(NAME, name) \
3a7be554 2975static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2976{ \
2977 static gen_helper_gvec_flags_4 * const fns[4] = { \
2978 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2979 gen_helper_sve_##name##_ppzw_s, NULL \
2980 }; \
2981 return do_ppzz_flags(s, a, fns[a->esz]); \
2982}
2983
2984DO_PPZW(CMPEQ, cmpeq)
2985DO_PPZW(CMPNE, cmpne)
2986DO_PPZW(CMPGT, cmpgt)
2987DO_PPZW(CMPGE, cmpge)
2988DO_PPZW(CMPHI, cmphi)
2989DO_PPZW(CMPHS, cmphs)
2990DO_PPZW(CMPLT, cmplt)
2991DO_PPZW(CMPLE, cmple)
2992DO_PPZW(CMPLO, cmplo)
2993DO_PPZW(CMPLS, cmpls)
2994
2995#undef DO_PPZW
2996
38cadeba
RH
2997/*
2998 *** SVE Integer Compare - Immediate Groups
2999 */
3000
3001static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3002 gen_helper_gvec_flags_3 *gen_fn)
3003{
3004 TCGv_ptr pd, zn, pg;
3005 unsigned vsz;
3006 TCGv_i32 t;
3007
3008 if (gen_fn == NULL) {
3009 return false;
3010 }
3011 if (!sve_access_check(s)) {
3012 return true;
3013 }
3014
3015 vsz = vec_full_reg_size(s);
3016 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3017 pd = tcg_temp_new_ptr();
3018 zn = tcg_temp_new_ptr();
3019 pg = tcg_temp_new_ptr();
3020
3021 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3022 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3023 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3024
3025 gen_fn(t, pd, zn, pg, t);
3026
3027 tcg_temp_free_ptr(pd);
3028 tcg_temp_free_ptr(zn);
3029 tcg_temp_free_ptr(pg);
3030
3031 do_pred_flags(t);
3032
3033 tcg_temp_free_i32(t);
3034 return true;
3035}
3036
3037#define DO_PPZI(NAME, name) \
3a7be554 3038static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3039{ \
3040 static gen_helper_gvec_flags_3 * const fns[4] = { \
3041 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3042 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3043 }; \
3044 return do_ppzi_flags(s, a, fns[a->esz]); \
3045}
3046
3047DO_PPZI(CMPEQ, cmpeq)
3048DO_PPZI(CMPNE, cmpne)
3049DO_PPZI(CMPGT, cmpgt)
3050DO_PPZI(CMPGE, cmpge)
3051DO_PPZI(CMPHI, cmphi)
3052DO_PPZI(CMPHS, cmphs)
3053DO_PPZI(CMPLT, cmplt)
3054DO_PPZI(CMPLE, cmple)
3055DO_PPZI(CMPLO, cmplo)
3056DO_PPZI(CMPLS, cmpls)
3057
3058#undef DO_PPZI
3059
35da316f
RH
3060/*
3061 *** SVE Partition Break Group
3062 */
3063
3064static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3065 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3066{
3067 if (!sve_access_check(s)) {
3068 return true;
3069 }
3070
3071 unsigned vsz = pred_full_reg_size(s);
3072
3073 /* Predicate sizes may be smaller and cannot use simd_desc. */
3074 TCGv_ptr d = tcg_temp_new_ptr();
3075 TCGv_ptr n = tcg_temp_new_ptr();
3076 TCGv_ptr m = tcg_temp_new_ptr();
3077 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3078 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3079
3080 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3081 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3082 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3083 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3084
3085 if (a->s) {
3086 fn_s(t, d, n, m, g, t);
3087 do_pred_flags(t);
3088 } else {
3089 fn(d, n, m, g, t);
3090 }
3091 tcg_temp_free_ptr(d);
3092 tcg_temp_free_ptr(n);
3093 tcg_temp_free_ptr(m);
3094 tcg_temp_free_ptr(g);
3095 tcg_temp_free_i32(t);
3096 return true;
3097}
3098
3099static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3100 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3101{
3102 if (!sve_access_check(s)) {
3103 return true;
3104 }
3105
3106 unsigned vsz = pred_full_reg_size(s);
3107
3108 /* Predicate sizes may be smaller and cannot use simd_desc. */
3109 TCGv_ptr d = tcg_temp_new_ptr();
3110 TCGv_ptr n = tcg_temp_new_ptr();
3111 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 3112 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3113
3114 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3115 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3116 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3117
3118 if (a->s) {
3119 fn_s(t, d, n, g, t);
3120 do_pred_flags(t);
3121 } else {
3122 fn(d, n, g, t);
3123 }
3124 tcg_temp_free_ptr(d);
3125 tcg_temp_free_ptr(n);
3126 tcg_temp_free_ptr(g);
3127 tcg_temp_free_i32(t);
3128 return true;
3129}
3130
3a7be554 3131static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3132{
3133 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3134}
3135
3a7be554 3136static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3137{
3138 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3139}
3140
3a7be554 3141static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3142{
3143 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3144}
3145
3a7be554 3146static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3147{
3148 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3149}
3150
3a7be554 3151static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3152{
3153 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3154}
3155
3a7be554 3156static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3157{
3158 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3159}
3160
3a7be554 3161static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3162{
3163 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3164}
3165
9ee3a611
RH
3166/*
3167 *** SVE Predicate Count Group
3168 */
3169
3170static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3171{
3172 unsigned psz = pred_full_reg_size(s);
3173
3174 if (psz <= 8) {
3175 uint64_t psz_mask;
3176
3177 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3178 if (pn != pg) {
3179 TCGv_i64 g = tcg_temp_new_i64();
3180 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3181 tcg_gen_and_i64(val, val, g);
3182 tcg_temp_free_i64(g);
3183 }
3184
3185 /* Reduce the pred_esz_masks value simply to reduce the
3186 * size of the code generated here.
3187 */
3188 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3189 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3190
3191 tcg_gen_ctpop_i64(val, val);
3192 } else {
3193 TCGv_ptr t_pn = tcg_temp_new_ptr();
3194 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3195 unsigned desc = 0;
9ee3a611
RH
3196 TCGv_i32 t_desc;
3197
f556a201
RH
3198 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3199 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3200
3201 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3202 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3203 t_desc = tcg_const_i32(desc);
3204
3205 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3206 tcg_temp_free_ptr(t_pn);
3207 tcg_temp_free_ptr(t_pg);
3208 tcg_temp_free_i32(t_desc);
3209 }
3210}
3211
3a7be554 3212static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3213{
3214 if (sve_access_check(s)) {
3215 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3216 }
3217 return true;
3218}
3219
3a7be554 3220static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3221{
3222 if (sve_access_check(s)) {
3223 TCGv_i64 reg = cpu_reg(s, a->rd);
3224 TCGv_i64 val = tcg_temp_new_i64();
3225
3226 do_cntp(s, val, a->esz, a->pg, a->pg);
3227 if (a->d) {
3228 tcg_gen_sub_i64(reg, reg, val);
3229 } else {
3230 tcg_gen_add_i64(reg, reg, val);
3231 }
3232 tcg_temp_free_i64(val);
3233 }
3234 return true;
3235}
3236
3a7be554 3237static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3238{
3239 if (a->esz == 0) {
3240 return false;
3241 }
3242 if (sve_access_check(s)) {
3243 unsigned vsz = vec_full_reg_size(s);
3244 TCGv_i64 val = tcg_temp_new_i64();
3245 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3246
3247 do_cntp(s, val, a->esz, a->pg, a->pg);
3248 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3249 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3250 }
3251 return true;
3252}
3253
3a7be554 3254static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3255{
3256 if (sve_access_check(s)) {
3257 TCGv_i64 reg = cpu_reg(s, a->rd);
3258 TCGv_i64 val = tcg_temp_new_i64();
3259
3260 do_cntp(s, val, a->esz, a->pg, a->pg);
3261 do_sat_addsub_32(reg, val, a->u, a->d);
3262 }
3263 return true;
3264}
3265
3a7be554 3266static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3267{
3268 if (sve_access_check(s)) {
3269 TCGv_i64 reg = cpu_reg(s, a->rd);
3270 TCGv_i64 val = tcg_temp_new_i64();
3271
3272 do_cntp(s, val, a->esz, a->pg, a->pg);
3273 do_sat_addsub_64(reg, val, a->u, a->d);
3274 }
3275 return true;
3276}
3277
3a7be554 3278static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3279{
3280 if (a->esz == 0) {
3281 return false;
3282 }
3283 if (sve_access_check(s)) {
3284 TCGv_i64 val = tcg_temp_new_i64();
3285 do_cntp(s, val, a->esz, a->pg, a->pg);
3286 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3287 }
3288 return true;
3289}
3290
caf1cefc
RH
3291/*
3292 *** SVE Integer Compare Scalars Group
3293 */
3294
3a7be554 3295static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3296{
3297 if (!sve_access_check(s)) {
3298 return true;
3299 }
3300
3301 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3302 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3303 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3304 TCGv_i64 cmp = tcg_temp_new_i64();
3305
3306 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3307 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3308 tcg_temp_free_i64(cmp);
3309
3310 /* VF = !NF & !CF. */
3311 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3312 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3313
3314 /* Both NF and VF actually look at bit 31. */
3315 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3316 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3317 return true;
3318}
3319
3a7be554 3320static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3321{
bbd0968c 3322 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3323 TCGv_i32 t2, t3;
3324 TCGv_ptr ptr;
e610906c
RH
3325 unsigned vsz = vec_full_reg_size(s);
3326 unsigned desc = 0;
caf1cefc 3327 TCGCond cond;
34688dbc
RH
3328 uint64_t maxval;
3329 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3330 bool eq = a->eq == a->lt;
caf1cefc 3331
34688dbc
RH
3332 /* The greater-than conditions are all SVE2. */
3333 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3334 return false;
3335 }
bbd0968c
RH
3336 if (!sve_access_check(s)) {
3337 return true;
3338 }
3339
3340 op0 = read_cpu_reg(s, a->rn, 1);
3341 op1 = read_cpu_reg(s, a->rm, 1);
3342
caf1cefc
RH
3343 if (!a->sf) {
3344 if (a->u) {
3345 tcg_gen_ext32u_i64(op0, op0);
3346 tcg_gen_ext32u_i64(op1, op1);
3347 } else {
3348 tcg_gen_ext32s_i64(op0, op0);
3349 tcg_gen_ext32s_i64(op1, op1);
3350 }
3351 }
3352
3353 /* For the helper, compress the different conditions into a computation
3354 * of how many iterations for which the condition is true.
caf1cefc 3355 */
bbd0968c
RH
3356 t0 = tcg_temp_new_i64();
3357 t1 = tcg_temp_new_i64();
34688dbc
RH
3358
3359 if (a->lt) {
3360 tcg_gen_sub_i64(t0, op1, op0);
3361 if (a->u) {
3362 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3363 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3364 } else {
3365 maxval = a->sf ? INT64_MAX : INT32_MAX;
3366 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3367 }
3368 } else {
3369 tcg_gen_sub_i64(t0, op0, op1);
3370 if (a->u) {
3371 maxval = 0;
3372 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3373 } else {
3374 maxval = a->sf ? INT64_MIN : INT32_MIN;
3375 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3376 }
3377 }
caf1cefc 3378
bbd0968c 3379 tmax = tcg_const_i64(vsz >> a->esz);
34688dbc 3380 if (eq) {
caf1cefc
RH
3381 /* Equality means one more iteration. */
3382 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3383
34688dbc
RH
3384 /*
3385 * For the less-than while, if op1 is maxval (and the only time
3386 * the addition above could overflow), then we produce an all-true
3387 * predicate by setting the count to the vector length. This is
3388 * because the pseudocode is described as an increment + compare
3389 * loop, and the maximum integer would always compare true.
3390 * Similarly, the greater-than while has the same issue with the
3391 * minimum integer due to the decrement + compare loop.
bbd0968c 3392 */
34688dbc 3393 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3394 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3395 }
3396
bbd0968c
RH
3397 /* Bound to the maximum. */
3398 tcg_gen_umin_i64(t0, t0, tmax);
3399 tcg_temp_free_i64(tmax);
3400
3401 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3402 tcg_gen_movi_i64(t1, 0);
3403 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3404 tcg_temp_free_i64(t1);
caf1cefc 3405
bbd0968c 3406 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3407 t2 = tcg_temp_new_i32();
3408 tcg_gen_extrl_i64_i32(t2, t0);
3409 tcg_temp_free_i64(t0);
bbd0968c
RH
3410
3411 /* Scale elements to bits. */
3412 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3413
e610906c
RH
3414 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3415 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3416 t3 = tcg_const_i32(desc);
3417
3418 ptr = tcg_temp_new_ptr();
3419 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3420
34688dbc
RH
3421 if (a->lt) {
3422 gen_helper_sve_whilel(t2, ptr, t2, t3);
3423 } else {
3424 gen_helper_sve_whileg(t2, ptr, t2, t3);
3425 }
caf1cefc
RH
3426 do_pred_flags(t2);
3427
3428 tcg_temp_free_ptr(ptr);
3429 tcg_temp_free_i32(t2);
3430 tcg_temp_free_i32(t3);
3431 return true;
3432}
3433
14f6dad1
RH
3434static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3435{
3436 TCGv_i64 op0, op1, diff, t1, tmax;
3437 TCGv_i32 t2, t3;
3438 TCGv_ptr ptr;
3439 unsigned vsz = vec_full_reg_size(s);
3440 unsigned desc = 0;
3441
3442 if (!dc_isar_feature(aa64_sve2, s)) {
3443 return false;
3444 }
3445 if (!sve_access_check(s)) {
3446 return true;
3447 }
3448
3449 op0 = read_cpu_reg(s, a->rn, 1);
3450 op1 = read_cpu_reg(s, a->rm, 1);
3451
3452 tmax = tcg_const_i64(vsz);
3453 diff = tcg_temp_new_i64();
3454
3455 if (a->rw) {
3456 /* WHILERW */
3457 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3458 t1 = tcg_temp_new_i64();
3459 tcg_gen_sub_i64(diff, op0, op1);
3460 tcg_gen_sub_i64(t1, op1, op0);
3461 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3462 tcg_temp_free_i64(t1);
3463 /* Round down to a multiple of ESIZE. */
3464 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3465 /* If op1 == op0, diff == 0, and the condition is always true. */
3466 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3467 } else {
3468 /* WHILEWR */
3469 tcg_gen_sub_i64(diff, op1, op0);
3470 /* Round down to a multiple of ESIZE. */
3471 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3472 /* If op0 >= op1, diff <= 0, the condition is always true. */
3473 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3474 }
3475
3476 /* Bound to the maximum. */
3477 tcg_gen_umin_i64(diff, diff, tmax);
3478 tcg_temp_free_i64(tmax);
3479
3480 /* Since we're bounded, pass as a 32-bit type. */
3481 t2 = tcg_temp_new_i32();
3482 tcg_gen_extrl_i64_i32(t2, diff);
3483 tcg_temp_free_i64(diff);
3484
3485 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3486 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3487 t3 = tcg_const_i32(desc);
3488
3489 ptr = tcg_temp_new_ptr();
3490 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3491
3492 gen_helper_sve_whilel(t2, ptr, t2, t3);
3493 do_pred_flags(t2);
3494
3495 tcg_temp_free_ptr(ptr);
3496 tcg_temp_free_i32(t2);
3497 tcg_temp_free_i32(t3);
3498 return true;
3499}
3500
ed491961
RH
3501/*
3502 *** SVE Integer Wide Immediate - Unpredicated Group
3503 */
3504
3a7be554 3505static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3506{
3507 if (a->esz == 0) {
3508 return false;
3509 }
3510 if (sve_access_check(s)) {
3511 unsigned vsz = vec_full_reg_size(s);
3512 int dofs = vec_full_reg_offset(s, a->rd);
3513 uint64_t imm;
3514
3515 /* Decode the VFP immediate. */
3516 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3517 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3518 }
3519 return true;
3520}
3521
3a7be554 3522static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3523{
3a7be554 3524 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3525 return false;
3526 }
3527 if (sve_access_check(s)) {
3528 unsigned vsz = vec_full_reg_size(s);
3529 int dofs = vec_full_reg_offset(s, a->rd);
3530
8711e71f 3531 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3532 }
3533 return true;
3534}
3535
3a7be554 3536static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3537{
3a7be554 3538 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3539 return false;
3540 }
3541 if (sve_access_check(s)) {
3542 unsigned vsz = vec_full_reg_size(s);
3543 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3544 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3545 }
3546 return true;
3547}
3548
3a7be554 3549static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3550{
3551 a->imm = -a->imm;
3a7be554 3552 return trans_ADD_zzi(s, a);
6e6a157d
RH
3553}
3554
3a7be554 3555static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3556{
53229a77 3557 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3558 static const GVecGen2s op[4] = {
3559 { .fni8 = tcg_gen_vec_sub8_i64,
3560 .fniv = tcg_gen_sub_vec,
3561 .fno = gen_helper_sve_subri_b,
53229a77 3562 .opt_opc = vecop_list,
6e6a157d
RH
3563 .vece = MO_8,
3564 .scalar_first = true },
3565 { .fni8 = tcg_gen_vec_sub16_i64,
3566 .fniv = tcg_gen_sub_vec,
3567 .fno = gen_helper_sve_subri_h,
53229a77 3568 .opt_opc = vecop_list,
6e6a157d
RH
3569 .vece = MO_16,
3570 .scalar_first = true },
3571 { .fni4 = tcg_gen_sub_i32,
3572 .fniv = tcg_gen_sub_vec,
3573 .fno = gen_helper_sve_subri_s,
53229a77 3574 .opt_opc = vecop_list,
6e6a157d
RH
3575 .vece = MO_32,
3576 .scalar_first = true },
3577 { .fni8 = tcg_gen_sub_i64,
3578 .fniv = tcg_gen_sub_vec,
3579 .fno = gen_helper_sve_subri_d,
53229a77 3580 .opt_opc = vecop_list,
6e6a157d
RH
3581 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3582 .vece = MO_64,
3583 .scalar_first = true }
3584 };
3585
3a7be554 3586 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3587 return false;
3588 }
3589 if (sve_access_check(s)) {
3590 unsigned vsz = vec_full_reg_size(s);
3591 TCGv_i64 c = tcg_const_i64(a->imm);
3592 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3593 vec_full_reg_offset(s, a->rn),
3594 vsz, vsz, c, &op[a->esz]);
3595 tcg_temp_free_i64(c);
3596 }
3597 return true;
3598}
3599
3a7be554 3600static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3601{
3602 if (sve_access_check(s)) {
3603 unsigned vsz = vec_full_reg_size(s);
3604 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3605 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3606 }
3607 return true;
3608}
3609
3a7be554 3610static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3611{
3a7be554 3612 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3613 return false;
3614 }
3615 if (sve_access_check(s)) {
3616 TCGv_i64 val = tcg_const_i64(a->imm);
3617 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3618 tcg_temp_free_i64(val);
3619 }
3620 return true;
3621}
3622
3a7be554 3623static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3624{
3a7be554 3625 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3626}
3627
3a7be554 3628static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3629{
3a7be554 3630 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3631}
3632
3a7be554 3633static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3634{
3a7be554 3635 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3636}
3637
3a7be554 3638static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3639{
3a7be554 3640 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3641}
3642
3643static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3644{
3645 if (sve_access_check(s)) {
3646 unsigned vsz = vec_full_reg_size(s);
3647 TCGv_i64 c = tcg_const_i64(a->imm);
3648
3649 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3650 vec_full_reg_offset(s, a->rn),
3651 c, vsz, vsz, 0, fn);
3652 tcg_temp_free_i64(c);
3653 }
3654 return true;
3655}
3656
3657#define DO_ZZI(NAME, name) \
3a7be554 3658static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3659{ \
3660 static gen_helper_gvec_2i * const fns[4] = { \
3661 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3662 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3663 }; \
3664 return do_zzi_ool(s, a, fns[a->esz]); \
3665}
3666
3667DO_ZZI(SMAX, smax)
3668DO_ZZI(UMAX, umax)
3669DO_ZZI(SMIN, smin)
3670DO_ZZI(UMIN, umin)
3671
3672#undef DO_ZZI
3673
3a7be554 3674static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3675{
3676 static gen_helper_gvec_3 * const fns[2][2] = {
3677 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3678 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3679 };
3680
3681 if (sve_access_check(s)) {
e645d1a1 3682 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
d730ecaa
RH
3683 }
3684 return true;
3685}
3686
3a7be554 3687static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3688{
3689 static gen_helper_gvec_3 * const fns[2][2] = {
3690 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3691 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3692 };
3693
3694 if (sve_access_check(s)) {
e645d1a1 3695 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
16fcfdc7
RH
3696 }
3697 return true;
3698}
3699
3700
ca40a6e6
RH
3701/*
3702 *** SVE Floating Point Multiply-Add Indexed Group
3703 */
3704
3a7be554 3705static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3706{
3707 static gen_helper_gvec_4_ptr * const fns[3] = {
3708 gen_helper_gvec_fmla_idx_h,
3709 gen_helper_gvec_fmla_idx_s,
3710 gen_helper_gvec_fmla_idx_d,
3711 };
3712
3713 if (sve_access_check(s)) {
3714 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3715 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3716 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3717 vec_full_reg_offset(s, a->rn),
3718 vec_full_reg_offset(s, a->rm),
3719 vec_full_reg_offset(s, a->ra),
3720 status, vsz, vsz, (a->index << 1) | a->sub,
3721 fns[a->esz - 1]);
3722 tcg_temp_free_ptr(status);
3723 }
3724 return true;
3725}
3726
3727/*
3728 *** SVE Floating Point Multiply Indexed Group
3729 */
3730
3a7be554 3731static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3732{
3733 static gen_helper_gvec_3_ptr * const fns[3] = {
3734 gen_helper_gvec_fmul_idx_h,
3735 gen_helper_gvec_fmul_idx_s,
3736 gen_helper_gvec_fmul_idx_d,
3737 };
3738
3739 if (sve_access_check(s)) {
3740 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3741 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3742 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3743 vec_full_reg_offset(s, a->rn),
3744 vec_full_reg_offset(s, a->rm),
3745 status, vsz, vsz, a->index, fns[a->esz - 1]);
3746 tcg_temp_free_ptr(status);
3747 }
3748 return true;
3749}
3750
23fbe79f
RH
3751/*
3752 *** SVE Floating Point Fast Reduction Group
3753 */
3754
3755typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3756 TCGv_ptr, TCGv_i32);
3757
3758static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3759 gen_helper_fp_reduce *fn)
3760{
3761 unsigned vsz = vec_full_reg_size(s);
3762 unsigned p2vsz = pow2ceil(vsz);
c648c9b7 3763 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3764 TCGv_ptr t_zn, t_pg, status;
3765 TCGv_i64 temp;
3766
3767 temp = tcg_temp_new_i64();
3768 t_zn = tcg_temp_new_ptr();
3769 t_pg = tcg_temp_new_ptr();
3770
3771 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3772 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3773 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3774
3775 fn(temp, t_zn, t_pg, status, t_desc);
3776 tcg_temp_free_ptr(t_zn);
3777 tcg_temp_free_ptr(t_pg);
3778 tcg_temp_free_ptr(status);
3779 tcg_temp_free_i32(t_desc);
3780
3781 write_fp_dreg(s, a->rd, temp);
3782 tcg_temp_free_i64(temp);
3783}
3784
3785#define DO_VPZ(NAME, name) \
3a7be554 3786static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3787{ \
3788 static gen_helper_fp_reduce * const fns[3] = { \
3789 gen_helper_sve_##name##_h, \
3790 gen_helper_sve_##name##_s, \
3791 gen_helper_sve_##name##_d, \
3792 }; \
3793 if (a->esz == 0) { \
3794 return false; \
3795 } \
3796 if (sve_access_check(s)) { \
3797 do_reduce(s, a, fns[a->esz - 1]); \
3798 } \
3799 return true; \
3800}
3801
3802DO_VPZ(FADDV, faddv)
3803DO_VPZ(FMINNMV, fminnmv)
3804DO_VPZ(FMAXNMV, fmaxnmv)
3805DO_VPZ(FMINV, fminv)
3806DO_VPZ(FMAXV, fmaxv)
3807
3887c038
RH
3808/*
3809 *** SVE Floating Point Unary Operations - Unpredicated Group
3810 */
3811
3812static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3813{
3814 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3815 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3816
3817 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3818 vec_full_reg_offset(s, a->rn),
3819 status, vsz, vsz, 0, fn);
3820 tcg_temp_free_ptr(status);
3821}
3822
3a7be554 3823static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3824{
3825 static gen_helper_gvec_2_ptr * const fns[3] = {
3826 gen_helper_gvec_frecpe_h,
3827 gen_helper_gvec_frecpe_s,
3828 gen_helper_gvec_frecpe_d,
3829 };
3830 if (a->esz == 0) {
3831 return false;
3832 }
3833 if (sve_access_check(s)) {
3834 do_zz_fp(s, a, fns[a->esz - 1]);
3835 }
3836 return true;
3837}
3838
3a7be554 3839static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3840{
3841 static gen_helper_gvec_2_ptr * const fns[3] = {
3842 gen_helper_gvec_frsqrte_h,
3843 gen_helper_gvec_frsqrte_s,
3844 gen_helper_gvec_frsqrte_d,
3845 };
3846 if (a->esz == 0) {
3847 return false;
3848 }
3849 if (sve_access_check(s)) {
3850 do_zz_fp(s, a, fns[a->esz - 1]);
3851 }
3852 return true;
3853}
3854
4d2e2a03
RH
3855/*
3856 *** SVE Floating Point Compare with Zero Group
3857 */
3858
3859static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3860 gen_helper_gvec_3_ptr *fn)
3861{
3862 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3863 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3864
3865 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3866 vec_full_reg_offset(s, a->rn),
3867 pred_full_reg_offset(s, a->pg),
3868 status, vsz, vsz, 0, fn);
3869 tcg_temp_free_ptr(status);
3870}
3871
3872#define DO_PPZ(NAME, name) \
3a7be554 3873static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3874{ \
3875 static gen_helper_gvec_3_ptr * const fns[3] = { \
3876 gen_helper_sve_##name##_h, \
3877 gen_helper_sve_##name##_s, \
3878 gen_helper_sve_##name##_d, \
3879 }; \
3880 if (a->esz == 0) { \
3881 return false; \
3882 } \
3883 if (sve_access_check(s)) { \
3884 do_ppz_fp(s, a, fns[a->esz - 1]); \
3885 } \
3886 return true; \
3887}
3888
3889DO_PPZ(FCMGE_ppz0, fcmge0)
3890DO_PPZ(FCMGT_ppz0, fcmgt0)
3891DO_PPZ(FCMLE_ppz0, fcmle0)
3892DO_PPZ(FCMLT_ppz0, fcmlt0)
3893DO_PPZ(FCMEQ_ppz0, fcmeq0)
3894DO_PPZ(FCMNE_ppz0, fcmne0)
3895
3896#undef DO_PPZ
3897
67fcd9ad
RH
3898/*
3899 *** SVE floating-point trig multiply-add coefficient
3900 */
3901
3a7be554 3902static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3903{
3904 static gen_helper_gvec_3_ptr * const fns[3] = {
3905 gen_helper_sve_ftmad_h,
3906 gen_helper_sve_ftmad_s,
3907 gen_helper_sve_ftmad_d,
3908 };
3909
3910 if (a->esz == 0) {
3911 return false;
3912 }
3913 if (sve_access_check(s)) {
3914 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3915 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3916 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3917 vec_full_reg_offset(s, a->rn),
3918 vec_full_reg_offset(s, a->rm),
3919 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3920 tcg_temp_free_ptr(status);
3921 }
3922 return true;
3923}
3924
7f9ddf64
RH
3925/*
3926 *** SVE Floating Point Accumulating Reduction Group
3927 */
3928
3a7be554 3929static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3930{
3931 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3932 TCGv_ptr, TCGv_ptr, TCGv_i32);
3933 static fadda_fn * const fns[3] = {
3934 gen_helper_sve_fadda_h,
3935 gen_helper_sve_fadda_s,
3936 gen_helper_sve_fadda_d,
3937 };
3938 unsigned vsz = vec_full_reg_size(s);
3939 TCGv_ptr t_rm, t_pg, t_fpst;
3940 TCGv_i64 t_val;
3941 TCGv_i32 t_desc;
3942
3943 if (a->esz == 0) {
3944 return false;
3945 }
3946 if (!sve_access_check(s)) {
3947 return true;
3948 }
3949
3950 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3951 t_rm = tcg_temp_new_ptr();
3952 t_pg = tcg_temp_new_ptr();
3953 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3954 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3955 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
3956 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3957
3958 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3959
3960 tcg_temp_free_i32(t_desc);
3961 tcg_temp_free_ptr(t_fpst);
3962 tcg_temp_free_ptr(t_pg);
3963 tcg_temp_free_ptr(t_rm);
3964
3965 write_fp_dreg(s, a->rd, t_val);
3966 tcg_temp_free_i64(t_val);
3967 return true;
3968}
3969
29b80469
RH
3970/*
3971 *** SVE Floating Point Arithmetic - Unpredicated Group
3972 */
3973
3974static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3975 gen_helper_gvec_3_ptr *fn)
3976{
3977 if (fn == NULL) {
3978 return false;
3979 }
3980 if (sve_access_check(s)) {
3981 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3982 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
3983 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3984 vec_full_reg_offset(s, a->rn),
3985 vec_full_reg_offset(s, a->rm),
3986 status, vsz, vsz, 0, fn);
3987 tcg_temp_free_ptr(status);
3988 }
3989 return true;
3990}
3991
3992
3993#define DO_FP3(NAME, name) \
3a7be554 3994static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3995{ \
3996 static gen_helper_gvec_3_ptr * const fns[4] = { \
3997 NULL, gen_helper_gvec_##name##_h, \
3998 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3999 }; \
4000 return do_zzz_fp(s, a, fns[a->esz]); \
4001}
4002
4003DO_FP3(FADD_zzz, fadd)
4004DO_FP3(FSUB_zzz, fsub)
4005DO_FP3(FMUL_zzz, fmul)
4006DO_FP3(FTSMUL, ftsmul)
4007DO_FP3(FRECPS, recps)
4008DO_FP3(FRSQRTS, rsqrts)
4009
4010#undef DO_FP3
4011
ec3b87c2
RH
4012/*
4013 *** SVE Floating Point Arithmetic - Predicated Group
4014 */
4015
4016static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4017 gen_helper_gvec_4_ptr *fn)
4018{
4019 if (fn == NULL) {
4020 return false;
4021 }
4022 if (sve_access_check(s)) {
4023 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4024 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4025 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4026 vec_full_reg_offset(s, a->rn),
4027 vec_full_reg_offset(s, a->rm),
4028 pred_full_reg_offset(s, a->pg),
4029 status, vsz, vsz, 0, fn);
4030 tcg_temp_free_ptr(status);
4031 }
4032 return true;
4033}
4034
4035#define DO_FP3(NAME, name) \
3a7be554 4036static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4037{ \
4038 static gen_helper_gvec_4_ptr * const fns[4] = { \
4039 NULL, gen_helper_sve_##name##_h, \
4040 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4041 }; \
4042 return do_zpzz_fp(s, a, fns[a->esz]); \
4043}
4044
4045DO_FP3(FADD_zpzz, fadd)
4046DO_FP3(FSUB_zpzz, fsub)
4047DO_FP3(FMUL_zpzz, fmul)
4048DO_FP3(FMIN_zpzz, fmin)
4049DO_FP3(FMAX_zpzz, fmax)
4050DO_FP3(FMINNM_zpzz, fminnum)
4051DO_FP3(FMAXNM_zpzz, fmaxnum)
4052DO_FP3(FABD, fabd)
4053DO_FP3(FSCALE, fscalbn)
4054DO_FP3(FDIV, fdiv)
4055DO_FP3(FMULX, fmulx)
4056
4057#undef DO_FP3
8092c6a3 4058
cc48affe
RH
4059typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4060 TCGv_i64, TCGv_ptr, TCGv_i32);
4061
4062static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4063 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4064{
4065 unsigned vsz = vec_full_reg_size(s);
4066 TCGv_ptr t_zd, t_zn, t_pg, status;
4067 TCGv_i32 desc;
4068
4069 t_zd = tcg_temp_new_ptr();
4070 t_zn = tcg_temp_new_ptr();
4071 t_pg = tcg_temp_new_ptr();
4072 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4073 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4074 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4075
cdfb22bb 4076 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
4077 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4078 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4079
4080 tcg_temp_free_i32(desc);
4081 tcg_temp_free_ptr(status);
4082 tcg_temp_free_ptr(t_pg);
4083 tcg_temp_free_ptr(t_zn);
4084 tcg_temp_free_ptr(t_zd);
4085}
4086
4087static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4088 gen_helper_sve_fp2scalar *fn)
4089{
4090 TCGv_i64 temp = tcg_const_i64(imm);
4091 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
4092 tcg_temp_free_i64(temp);
4093}
4094
4095#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4096static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4097{ \
4098 static gen_helper_sve_fp2scalar * const fns[3] = { \
4099 gen_helper_sve_##name##_h, \
4100 gen_helper_sve_##name##_s, \
4101 gen_helper_sve_##name##_d \
4102 }; \
4103 static uint64_t const val[3][2] = { \
4104 { float16_##const0, float16_##const1 }, \
4105 { float32_##const0, float32_##const1 }, \
4106 { float64_##const0, float64_##const1 }, \
4107 }; \
4108 if (a->esz == 0) { \
4109 return false; \
4110 } \
4111 if (sve_access_check(s)) { \
4112 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4113 } \
4114 return true; \
4115}
4116
cc48affe
RH
4117DO_FP_IMM(FADD, fadds, half, one)
4118DO_FP_IMM(FSUB, fsubs, half, one)
4119DO_FP_IMM(FMUL, fmuls, half, two)
4120DO_FP_IMM(FSUBR, fsubrs, half, one)
4121DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4122DO_FP_IMM(FMINNM, fminnms, zero, one)
4123DO_FP_IMM(FMAX, fmaxs, zero, one)
4124DO_FP_IMM(FMIN, fmins, zero, one)
4125
4126#undef DO_FP_IMM
4127
abfdefd5
RH
4128static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4129 gen_helper_gvec_4_ptr *fn)
4130{
4131 if (fn == NULL) {
4132 return false;
4133 }
4134 if (sve_access_check(s)) {
4135 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4136 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4137 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4138 vec_full_reg_offset(s, a->rn),
4139 vec_full_reg_offset(s, a->rm),
4140 pred_full_reg_offset(s, a->pg),
4141 status, vsz, vsz, 0, fn);
4142 tcg_temp_free_ptr(status);
4143 }
4144 return true;
4145}
4146
4147#define DO_FPCMP(NAME, name) \
3a7be554 4148static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4149{ \
4150 static gen_helper_gvec_4_ptr * const fns[4] = { \
4151 NULL, gen_helper_sve_##name##_h, \
4152 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4153 }; \
4154 return do_fp_cmp(s, a, fns[a->esz]); \
4155}
4156
4157DO_FPCMP(FCMGE, fcmge)
4158DO_FPCMP(FCMGT, fcmgt)
4159DO_FPCMP(FCMEQ, fcmeq)
4160DO_FPCMP(FCMNE, fcmne)
4161DO_FPCMP(FCMUO, fcmuo)
4162DO_FPCMP(FACGE, facge)
4163DO_FPCMP(FACGT, facgt)
4164
4165#undef DO_FPCMP
4166
3a7be554 4167static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4168{
4169 static gen_helper_gvec_4_ptr * const fns[3] = {
4170 gen_helper_sve_fcadd_h,
4171 gen_helper_sve_fcadd_s,
4172 gen_helper_sve_fcadd_d
4173 };
4174
4175 if (a->esz == 0) {
4176 return false;
4177 }
4178 if (sve_access_check(s)) {
4179 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4180 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4181 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4182 vec_full_reg_offset(s, a->rn),
4183 vec_full_reg_offset(s, a->rm),
4184 pred_full_reg_offset(s, a->pg),
4185 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4186 tcg_temp_free_ptr(status);
4187 }
4188 return true;
4189}
4190
08975da9
RH
4191static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4192 gen_helper_gvec_5_ptr *fn)
6ceabaad 4193{
08975da9 4194 if (a->esz == 0) {
6ceabaad
RH
4195 return false;
4196 }
08975da9
RH
4197 if (sve_access_check(s)) {
4198 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4199 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4200 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4201 vec_full_reg_offset(s, a->rn),
4202 vec_full_reg_offset(s, a->rm),
4203 vec_full_reg_offset(s, a->ra),
4204 pred_full_reg_offset(s, a->pg),
4205 status, vsz, vsz, 0, fn);
4206 tcg_temp_free_ptr(status);
6ceabaad 4207 }
6ceabaad
RH
4208 return true;
4209}
4210
4211#define DO_FMLA(NAME, name) \
3a7be554 4212static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4213{ \
08975da9 4214 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4215 NULL, gen_helper_sve_##name##_h, \
4216 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4217 }; \
4218 return do_fmla(s, a, fns[a->esz]); \
4219}
4220
4221DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4222DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4223DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4224DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4225
4226#undef DO_FMLA
4227
3a7be554 4228static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4229{
08975da9
RH
4230 static gen_helper_gvec_5_ptr * const fns[4] = {
4231 NULL,
05f48bab
RH
4232 gen_helper_sve_fcmla_zpzzz_h,
4233 gen_helper_sve_fcmla_zpzzz_s,
4234 gen_helper_sve_fcmla_zpzzz_d,
4235 };
4236
4237 if (a->esz == 0) {
4238 return false;
4239 }
4240 if (sve_access_check(s)) {
4241 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4242 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4243 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4244 vec_full_reg_offset(s, a->rn),
4245 vec_full_reg_offset(s, a->rm),
4246 vec_full_reg_offset(s, a->ra),
4247 pred_full_reg_offset(s, a->pg),
4248 status, vsz, vsz, a->rot, fns[a->esz]);
4249 tcg_temp_free_ptr(status);
05f48bab
RH
4250 }
4251 return true;
4252}
4253
3a7be554 4254static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
4255{
4256 static gen_helper_gvec_3_ptr * const fns[2] = {
4257 gen_helper_gvec_fcmlah_idx,
4258 gen_helper_gvec_fcmlas_idx,
4259 };
4260
4261 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4262 tcg_debug_assert(a->rd == a->ra);
4263 if (sve_access_check(s)) {
4264 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4265 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
4266 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4267 vec_full_reg_offset(s, a->rn),
4268 vec_full_reg_offset(s, a->rm),
4269 status, vsz, vsz,
4270 a->index * 4 + a->rot,
4271 fns[a->esz - 1]);
4272 tcg_temp_free_ptr(status);
4273 }
4274 return true;
4275}
4276
8092c6a3
RH
4277/*
4278 *** SVE Floating Point Unary Operations Predicated Group
4279 */
4280
4281static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4282 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4283{
4284 if (sve_access_check(s)) {
4285 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4286 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4287 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4288 vec_full_reg_offset(s, rn),
4289 pred_full_reg_offset(s, pg),
4290 status, vsz, vsz, 0, fn);
4291 tcg_temp_free_ptr(status);
4292 }
4293 return true;
4294}
4295
3a7be554 4296static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4297{
e4ab5124 4298 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4299}
4300
3a7be554 4301static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4302{
4303 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4304}
4305
3a7be554 4306static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4307{
e4ab5124 4308 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4309}
4310
3a7be554 4311static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4312{
4313 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4314}
4315
3a7be554 4316static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4317{
4318 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4319}
4320
3a7be554 4321static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4322{
4323 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4324}
4325
3a7be554 4326static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4327{
4328 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4329}
4330
3a7be554 4331static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4332{
4333 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4334}
4335
3a7be554 4336static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4337{
4338 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4339}
4340
3a7be554 4341static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4342{
4343 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4344}
4345
3a7be554 4346static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4347{
4348 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4349}
4350
3a7be554 4351static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4352{
4353 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4354}
4355
3a7be554 4356static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4357{
4358 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4359}
4360
3a7be554 4361static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4362{
4363 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4364}
4365
3a7be554 4366static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4367{
4368 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4369}
4370
3a7be554 4371static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4372{
4373 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4374}
4375
3a7be554 4376static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4377{
4378 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4379}
4380
3a7be554 4381static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4382{
4383 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4384}
4385
3a7be554 4386static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4387{
4388 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4389}
4390
3a7be554 4391static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4392{
4393 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4394}
4395
cda3c753
RH
4396static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4397 gen_helper_sve_frint_h,
4398 gen_helper_sve_frint_s,
4399 gen_helper_sve_frint_d
4400};
4401
3a7be554 4402static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4403{
4404 if (a->esz == 0) {
4405 return false;
4406 }
4407 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4408 frint_fns[a->esz - 1]);
4409}
4410
3a7be554 4411static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4412{
4413 static gen_helper_gvec_3_ptr * const fns[3] = {
4414 gen_helper_sve_frintx_h,
4415 gen_helper_sve_frintx_s,
4416 gen_helper_sve_frintx_d
4417 };
4418 if (a->esz == 0) {
4419 return false;
4420 }
4421 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4422}
4423
4424static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4425{
4426 if (a->esz == 0) {
4427 return false;
4428 }
4429 if (sve_access_check(s)) {
4430 unsigned vsz = vec_full_reg_size(s);
4431 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4432 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4433
4434 gen_helper_set_rmode(tmode, tmode, status);
4435
4436 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4437 vec_full_reg_offset(s, a->rn),
4438 pred_full_reg_offset(s, a->pg),
4439 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4440
4441 gen_helper_set_rmode(tmode, tmode, status);
4442 tcg_temp_free_i32(tmode);
4443 tcg_temp_free_ptr(status);
4444 }
4445 return true;
4446}
4447
3a7be554 4448static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4449{
4450 return do_frint_mode(s, a, float_round_nearest_even);
4451}
4452
3a7be554 4453static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4454{
4455 return do_frint_mode(s, a, float_round_up);
4456}
4457
3a7be554 4458static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4459{
4460 return do_frint_mode(s, a, float_round_down);
4461}
4462
3a7be554 4463static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4464{
4465 return do_frint_mode(s, a, float_round_to_zero);
4466}
4467
3a7be554 4468static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4469{
4470 return do_frint_mode(s, a, float_round_ties_away);
4471}
4472
3a7be554 4473static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4474{
4475 static gen_helper_gvec_3_ptr * const fns[3] = {
4476 gen_helper_sve_frecpx_h,
4477 gen_helper_sve_frecpx_s,
4478 gen_helper_sve_frecpx_d
4479 };
4480 if (a->esz == 0) {
4481 return false;
4482 }
4483 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4484}
4485
3a7be554 4486static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4487{
4488 static gen_helper_gvec_3_ptr * const fns[3] = {
4489 gen_helper_sve_fsqrt_h,
4490 gen_helper_sve_fsqrt_s,
4491 gen_helper_sve_fsqrt_d
4492 };
4493 if (a->esz == 0) {
4494 return false;
4495 }
4496 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4497}
4498
3a7be554 4499static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4500{
4501 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4502}
4503
3a7be554 4504static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4505{
4506 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4507}
4508
3a7be554 4509static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4510{
4511 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4512}
4513
3a7be554 4514static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4515{
4516 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4517}
4518
3a7be554 4519static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4520{
4521 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4522}
4523
3a7be554 4524static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4525{
4526 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4527}
4528
3a7be554 4529static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4530{
4531 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4532}
4533
3a7be554 4534static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4535{
4536 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4537}
4538
3a7be554 4539static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4540{
4541 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4542}
4543
3a7be554 4544static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4545{
4546 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4547}
4548
3a7be554 4549static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4550{
4551 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4552}
4553
3a7be554 4554static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4555{
4556 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4557}
4558
3a7be554 4559static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4560{
4561 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4562}
4563
3a7be554 4564static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4565{
4566 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4567}
4568
d1822297
RH
4569/*
4570 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4571 */
4572
4573/* Subroutine loading a vector register at VOFS of LEN bytes.
4574 * The load should begin at the address Rn + IMM.
4575 */
4576
19f2acc9 4577static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4578{
19f2acc9
RH
4579 int len_align = QEMU_ALIGN_DOWN(len, 8);
4580 int len_remain = len % 8;
4581 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4582 int midx = get_mem_index(s);
b2aa8879 4583 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4584
b2aa8879
RH
4585 dirty_addr = tcg_temp_new_i64();
4586 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4587 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4588 tcg_temp_free_i64(dirty_addr);
d1822297 4589
b2aa8879
RH
4590 /*
4591 * Note that unpredicated load/store of vector/predicate registers
d1822297 4592 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4593 * operations on larger quantities.
d1822297
RH
4594 * Attempt to keep code expansion to a minimum by limiting the
4595 * amount of unrolling done.
4596 */
4597 if (nparts <= 4) {
4598 int i;
4599
b2aa8879 4600 t0 = tcg_temp_new_i64();
d1822297 4601 for (i = 0; i < len_align; i += 8) {
b2aa8879 4602 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
d1822297 4603 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4604 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4605 }
b2aa8879 4606 tcg_temp_free_i64(t0);
d1822297
RH
4607 } else {
4608 TCGLabel *loop = gen_new_label();
4609 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4610
b2aa8879
RH
4611 /* Copy the clean address into a local temp, live across the loop. */
4612 t0 = clean_addr;
4b4dc975 4613 clean_addr = new_tmp_a64_local(s);
b2aa8879 4614 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4615
b2aa8879 4616 gen_set_label(loop);
d1822297 4617
b2aa8879
RH
4618 t0 = tcg_temp_new_i64();
4619 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4620 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4621
b2aa8879 4622 tp = tcg_temp_new_ptr();
d1822297
RH
4623 tcg_gen_add_ptr(tp, cpu_env, i);
4624 tcg_gen_addi_ptr(i, i, 8);
4625 tcg_gen_st_i64(t0, tp, vofs);
4626 tcg_temp_free_ptr(tp);
b2aa8879 4627 tcg_temp_free_i64(t0);
d1822297
RH
4628
4629 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4630 tcg_temp_free_ptr(i);
4631 }
4632
b2aa8879
RH
4633 /*
4634 * Predicate register loads can be any multiple of 2.
d1822297
RH
4635 * Note that we still store the entire 64-bit unit into cpu_env.
4636 */
4637 if (len_remain) {
b2aa8879 4638 t0 = tcg_temp_new_i64();
d1822297
RH
4639 switch (len_remain) {
4640 case 2:
4641 case 4:
4642 case 8:
b2aa8879
RH
4643 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4644 MO_LE | ctz32(len_remain));
d1822297
RH
4645 break;
4646
4647 case 6:
4648 t1 = tcg_temp_new_i64();
b2aa8879
RH
4649 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4650 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4651 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4652 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4653 tcg_temp_free_i64(t1);
4654 break;
4655
4656 default:
4657 g_assert_not_reached();
4658 }
4659 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4660 tcg_temp_free_i64(t0);
d1822297 4661 }
d1822297
RH
4662}
4663
5047c204 4664/* Similarly for stores. */
19f2acc9 4665static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4666{
19f2acc9
RH
4667 int len_align = QEMU_ALIGN_DOWN(len, 8);
4668 int len_remain = len % 8;
4669 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4670 int midx = get_mem_index(s);
bba87d0a 4671 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4672
bba87d0a
RH
4673 dirty_addr = tcg_temp_new_i64();
4674 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4675 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4676 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4677
4678 /* Note that unpredicated load/store of vector/predicate registers
4679 * are defined as a stream of bytes, which equates to little-endian
4680 * operations on larger quantities. There is no nice way to force
4681 * a little-endian store for aarch64_be-linux-user out of line.
4682 *
4683 * Attempt to keep code expansion to a minimum by limiting the
4684 * amount of unrolling done.
4685 */
4686 if (nparts <= 4) {
4687 int i;
4688
bba87d0a 4689 t0 = tcg_temp_new_i64();
5047c204
RH
4690 for (i = 0; i < len_align; i += 8) {
4691 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
bba87d0a 4692 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
d8227b09 4693 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4694 }
bba87d0a 4695 tcg_temp_free_i64(t0);
5047c204
RH
4696 } else {
4697 TCGLabel *loop = gen_new_label();
bba87d0a 4698 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4699
bba87d0a
RH
4700 /* Copy the clean address into a local temp, live across the loop. */
4701 t0 = clean_addr;
4b4dc975 4702 clean_addr = new_tmp_a64_local(s);
bba87d0a 4703 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4704
bba87d0a 4705 gen_set_label(loop);
5047c204 4706
bba87d0a
RH
4707 t0 = tcg_temp_new_i64();
4708 tp = tcg_temp_new_ptr();
4709 tcg_gen_add_ptr(tp, cpu_env, i);
4710 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4711 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4712 tcg_temp_free_ptr(tp);
4713
4714 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4715 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4716 tcg_temp_free_i64(t0);
5047c204
RH
4717
4718 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4719 tcg_temp_free_ptr(i);
4720 }
4721
4722 /* Predicate register stores can be any multiple of 2. */
4723 if (len_remain) {
bba87d0a 4724 t0 = tcg_temp_new_i64();
5047c204 4725 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4726
4727 switch (len_remain) {
4728 case 2:
4729 case 4:
4730 case 8:
bba87d0a
RH
4731 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4732 MO_LE | ctz32(len_remain));
5047c204
RH
4733 break;
4734
4735 case 6:
bba87d0a
RH
4736 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4737 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4738 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4739 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4740 break;
4741
4742 default:
4743 g_assert_not_reached();
4744 }
bba87d0a 4745 tcg_temp_free_i64(t0);
5047c204 4746 }
5047c204
RH
4747}
4748
3a7be554 4749static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4750{
4751 if (sve_access_check(s)) {
4752 int size = vec_full_reg_size(s);
4753 int off = vec_full_reg_offset(s, a->rd);
4754 do_ldr(s, off, size, a->rn, a->imm * size);
4755 }
4756 return true;
4757}
4758
3a7be554 4759static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4760{
4761 if (sve_access_check(s)) {
4762 int size = pred_full_reg_size(s);
4763 int off = pred_full_reg_offset(s, a->rd);
4764 do_ldr(s, off, size, a->rn, a->imm * size);
4765 }
4766 return true;
4767}
c4e7c493 4768
3a7be554 4769static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4770{
4771 if (sve_access_check(s)) {
4772 int size = vec_full_reg_size(s);
4773 int off = vec_full_reg_offset(s, a->rd);
4774 do_str(s, off, size, a->rn, a->imm * size);
4775 }
4776 return true;
4777}
4778
3a7be554 4779static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4780{
4781 if (sve_access_check(s)) {
4782 int size = pred_full_reg_size(s);
4783 int off = pred_full_reg_offset(s, a->rd);
4784 do_str(s, off, size, a->rn, a->imm * size);
4785 }
4786 return true;
4787}
4788
c4e7c493
RH
4789/*
4790 *** SVE Memory - Contiguous Load Group
4791 */
4792
4793/* The memory mode of the dtype. */
14776ab5 4794static const MemOp dtype_mop[16] = {
c4e7c493
RH
4795 MO_UB, MO_UB, MO_UB, MO_UB,
4796 MO_SL, MO_UW, MO_UW, MO_UW,
4797 MO_SW, MO_SW, MO_UL, MO_UL,
4798 MO_SB, MO_SB, MO_SB, MO_Q
4799};
4800
4801#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4802
4803/* The vector element size of dtype. */
4804static const uint8_t dtype_esz[16] = {
4805 0, 1, 2, 3,
4806 3, 1, 2, 3,
4807 3, 2, 2, 3,
4808 3, 2, 1, 3
4809};
4810
4811static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4812 int dtype, uint32_t mte_n, bool is_write,
4813 gen_helper_gvec_mem *fn)
c4e7c493
RH
4814{
4815 unsigned vsz = vec_full_reg_size(s);
4816 TCGv_ptr t_pg;
500d0484 4817 TCGv_i32 t_desc;
206adacf 4818 int desc = 0;
c4e7c493 4819
206adacf
RH
4820 /*
4821 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4822 * registers as pointers, so encode the regno into the data field.
4823 * For consistency, do this even for LD1.
4824 */
9473d0ec 4825 if (s->mte_active[0]) {
206adacf
RH
4826 int msz = dtype_msz(dtype);
4827
4828 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4829 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4830 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4831 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4832 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4833 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4834 } else {
4835 addr = clean_data_tbi(s, addr);
206adacf 4836 }
9473d0ec 4837
206adacf 4838 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 4839 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4840 t_pg = tcg_temp_new_ptr();
4841
4842 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4843 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4844
4845 tcg_temp_free_ptr(t_pg);
500d0484 4846 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4847}
4848
4849static void do_ld_zpa(DisasContext *s, int zt, int pg,
4850 TCGv_i64 addr, int dtype, int nreg)
4851{
206adacf
RH
4852 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4853 { /* mte inactive, little-endian */
4854 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
7d0a57a2 4855 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
206adacf
RH
4856 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4857 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4858 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4859
4860 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4861 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4862 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4863 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4864 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4865
4866 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4867 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4868 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4869 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4870 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4871
4872 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4873 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4874 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4875 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4876 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4877
4878 /* mte inactive, big-endian */
4879 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4880 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4881 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4882 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4883 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4884
4885 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4886 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4887 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4888 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4889 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4890
4891 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4892 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4893 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4894 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4895 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4896
4897 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4898 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4899 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4900 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4901 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4902
4903 { /* mte active, little-endian */
4904 { { gen_helper_sve_ld1bb_r_mte,
4905 gen_helper_sve_ld2bb_r_mte,
4906 gen_helper_sve_ld3bb_r_mte,
4907 gen_helper_sve_ld4bb_r_mte },
4908 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4909 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4910 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4911
4912 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4913 { gen_helper_sve_ld1hh_le_r_mte,
4914 gen_helper_sve_ld2hh_le_r_mte,
4915 gen_helper_sve_ld3hh_le_r_mte,
4916 gen_helper_sve_ld4hh_le_r_mte },
4917 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4918 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4919
4920 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4921 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4922 { gen_helper_sve_ld1ss_le_r_mte,
4923 gen_helper_sve_ld2ss_le_r_mte,
4924 gen_helper_sve_ld3ss_le_r_mte,
4925 gen_helper_sve_ld4ss_le_r_mte },
4926 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4927
4928 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4929 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4930 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4931 { gen_helper_sve_ld1dd_le_r_mte,
4932 gen_helper_sve_ld2dd_le_r_mte,
4933 gen_helper_sve_ld3dd_le_r_mte,
4934 gen_helper_sve_ld4dd_le_r_mte } },
4935
4936 /* mte active, big-endian */
4937 { { gen_helper_sve_ld1bb_r_mte,
4938 gen_helper_sve_ld2bb_r_mte,
4939 gen_helper_sve_ld3bb_r_mte,
4940 gen_helper_sve_ld4bb_r_mte },
4941 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4942 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4943 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4944
4945 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4946 { gen_helper_sve_ld1hh_be_r_mte,
4947 gen_helper_sve_ld2hh_be_r_mte,
4948 gen_helper_sve_ld3hh_be_r_mte,
4949 gen_helper_sve_ld4hh_be_r_mte },
4950 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4951 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4952
4953 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4954 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4955 { gen_helper_sve_ld1ss_be_r_mte,
4956 gen_helper_sve_ld2ss_be_r_mte,
4957 gen_helper_sve_ld3ss_be_r_mte,
4958 gen_helper_sve_ld4ss_be_r_mte },
4959 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4960
4961 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4962 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4963 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4964 { gen_helper_sve_ld1dd_be_r_mte,
4965 gen_helper_sve_ld2dd_be_r_mte,
4966 gen_helper_sve_ld3dd_be_r_mte,
4967 gen_helper_sve_ld4dd_be_r_mte } } },
c4e7c493 4968 };
206adacf
RH
4969 gen_helper_gvec_mem *fn
4970 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4971
206adacf
RH
4972 /*
4973 * While there are holes in the table, they are not
c4e7c493
RH
4974 * accessible via the instruction encoding.
4975 */
4976 assert(fn != NULL);
206adacf 4977 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4978}
4979
3a7be554 4980static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4981{
4982 if (a->rm == 31) {
4983 return false;
4984 }
4985 if (sve_access_check(s)) {
4986 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4987 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4988 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4989 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4990 }
4991 return true;
4992}
4993
3a7be554 4994static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4995{
4996 if (sve_access_check(s)) {
4997 int vsz = vec_full_reg_size(s);
4998 int elements = vsz >> dtype_esz[a->dtype];
4999 TCGv_i64 addr = new_tmp_a64(s);
5000
5001 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5002 (a->imm * elements * (a->nreg + 1))
5003 << dtype_msz(a->dtype));
5004 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5005 }
5006 return true;
5007}
e2654d75 5008
3a7be554 5009static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5010{
aa13f7c3
RH
5011 static gen_helper_gvec_mem * const fns[2][2][16] = {
5012 { /* mte inactive, little-endian */
5013 { gen_helper_sve_ldff1bb_r,
5014 gen_helper_sve_ldff1bhu_r,
5015 gen_helper_sve_ldff1bsu_r,
5016 gen_helper_sve_ldff1bdu_r,
5017
5018 gen_helper_sve_ldff1sds_le_r,
5019 gen_helper_sve_ldff1hh_le_r,
5020 gen_helper_sve_ldff1hsu_le_r,
5021 gen_helper_sve_ldff1hdu_le_r,
5022
5023 gen_helper_sve_ldff1hds_le_r,
5024 gen_helper_sve_ldff1hss_le_r,
5025 gen_helper_sve_ldff1ss_le_r,
5026 gen_helper_sve_ldff1sdu_le_r,
5027
5028 gen_helper_sve_ldff1bds_r,
5029 gen_helper_sve_ldff1bss_r,
5030 gen_helper_sve_ldff1bhs_r,
5031 gen_helper_sve_ldff1dd_le_r },
5032
5033 /* mte inactive, big-endian */
5034 { gen_helper_sve_ldff1bb_r,
5035 gen_helper_sve_ldff1bhu_r,
5036 gen_helper_sve_ldff1bsu_r,
5037 gen_helper_sve_ldff1bdu_r,
5038
5039 gen_helper_sve_ldff1sds_be_r,
5040 gen_helper_sve_ldff1hh_be_r,
5041 gen_helper_sve_ldff1hsu_be_r,
5042 gen_helper_sve_ldff1hdu_be_r,
5043
5044 gen_helper_sve_ldff1hds_be_r,
5045 gen_helper_sve_ldff1hss_be_r,
5046 gen_helper_sve_ldff1ss_be_r,
5047 gen_helper_sve_ldff1sdu_be_r,
5048
5049 gen_helper_sve_ldff1bds_r,
5050 gen_helper_sve_ldff1bss_r,
5051 gen_helper_sve_ldff1bhs_r,
5052 gen_helper_sve_ldff1dd_be_r } },
5053
5054 { /* mte active, little-endian */
5055 { gen_helper_sve_ldff1bb_r_mte,
5056 gen_helper_sve_ldff1bhu_r_mte,
5057 gen_helper_sve_ldff1bsu_r_mte,
5058 gen_helper_sve_ldff1bdu_r_mte,
5059
5060 gen_helper_sve_ldff1sds_le_r_mte,
5061 gen_helper_sve_ldff1hh_le_r_mte,
5062 gen_helper_sve_ldff1hsu_le_r_mte,
5063 gen_helper_sve_ldff1hdu_le_r_mte,
5064
5065 gen_helper_sve_ldff1hds_le_r_mte,
5066 gen_helper_sve_ldff1hss_le_r_mte,
5067 gen_helper_sve_ldff1ss_le_r_mte,
5068 gen_helper_sve_ldff1sdu_le_r_mte,
5069
5070 gen_helper_sve_ldff1bds_r_mte,
5071 gen_helper_sve_ldff1bss_r_mte,
5072 gen_helper_sve_ldff1bhs_r_mte,
5073 gen_helper_sve_ldff1dd_le_r_mte },
5074
5075 /* mte active, big-endian */
5076 { gen_helper_sve_ldff1bb_r_mte,
5077 gen_helper_sve_ldff1bhu_r_mte,
5078 gen_helper_sve_ldff1bsu_r_mte,
5079 gen_helper_sve_ldff1bdu_r_mte,
5080
5081 gen_helper_sve_ldff1sds_be_r_mte,
5082 gen_helper_sve_ldff1hh_be_r_mte,
5083 gen_helper_sve_ldff1hsu_be_r_mte,
5084 gen_helper_sve_ldff1hdu_be_r_mte,
5085
5086 gen_helper_sve_ldff1hds_be_r_mte,
5087 gen_helper_sve_ldff1hss_be_r_mte,
5088 gen_helper_sve_ldff1ss_be_r_mte,
5089 gen_helper_sve_ldff1sdu_be_r_mte,
5090
5091 gen_helper_sve_ldff1bds_r_mte,
5092 gen_helper_sve_ldff1bss_r_mte,
5093 gen_helper_sve_ldff1bhs_r_mte,
5094 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5095 };
5096
5097 if (sve_access_check(s)) {
5098 TCGv_i64 addr = new_tmp_a64(s);
5099 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5100 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5101 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5102 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5103 }
5104 return true;
5105}
5106
3a7be554 5107static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5108{
aa13f7c3
RH
5109 static gen_helper_gvec_mem * const fns[2][2][16] = {
5110 { /* mte inactive, little-endian */
5111 { gen_helper_sve_ldnf1bb_r,
5112 gen_helper_sve_ldnf1bhu_r,
5113 gen_helper_sve_ldnf1bsu_r,
5114 gen_helper_sve_ldnf1bdu_r,
5115
5116 gen_helper_sve_ldnf1sds_le_r,
5117 gen_helper_sve_ldnf1hh_le_r,
5118 gen_helper_sve_ldnf1hsu_le_r,
5119 gen_helper_sve_ldnf1hdu_le_r,
5120
5121 gen_helper_sve_ldnf1hds_le_r,
5122 gen_helper_sve_ldnf1hss_le_r,
5123 gen_helper_sve_ldnf1ss_le_r,
5124 gen_helper_sve_ldnf1sdu_le_r,
5125
5126 gen_helper_sve_ldnf1bds_r,
5127 gen_helper_sve_ldnf1bss_r,
5128 gen_helper_sve_ldnf1bhs_r,
5129 gen_helper_sve_ldnf1dd_le_r },
5130
5131 /* mte inactive, big-endian */
5132 { gen_helper_sve_ldnf1bb_r,
5133 gen_helper_sve_ldnf1bhu_r,
5134 gen_helper_sve_ldnf1bsu_r,
5135 gen_helper_sve_ldnf1bdu_r,
5136
5137 gen_helper_sve_ldnf1sds_be_r,
5138 gen_helper_sve_ldnf1hh_be_r,
5139 gen_helper_sve_ldnf1hsu_be_r,
5140 gen_helper_sve_ldnf1hdu_be_r,
5141
5142 gen_helper_sve_ldnf1hds_be_r,
5143 gen_helper_sve_ldnf1hss_be_r,
5144 gen_helper_sve_ldnf1ss_be_r,
5145 gen_helper_sve_ldnf1sdu_be_r,
5146
5147 gen_helper_sve_ldnf1bds_r,
5148 gen_helper_sve_ldnf1bss_r,
5149 gen_helper_sve_ldnf1bhs_r,
5150 gen_helper_sve_ldnf1dd_be_r } },
5151
5152 { /* mte inactive, little-endian */
5153 { gen_helper_sve_ldnf1bb_r_mte,
5154 gen_helper_sve_ldnf1bhu_r_mte,
5155 gen_helper_sve_ldnf1bsu_r_mte,
5156 gen_helper_sve_ldnf1bdu_r_mte,
5157
5158 gen_helper_sve_ldnf1sds_le_r_mte,
5159 gen_helper_sve_ldnf1hh_le_r_mte,
5160 gen_helper_sve_ldnf1hsu_le_r_mte,
5161 gen_helper_sve_ldnf1hdu_le_r_mte,
5162
5163 gen_helper_sve_ldnf1hds_le_r_mte,
5164 gen_helper_sve_ldnf1hss_le_r_mte,
5165 gen_helper_sve_ldnf1ss_le_r_mte,
5166 gen_helper_sve_ldnf1sdu_le_r_mte,
5167
5168 gen_helper_sve_ldnf1bds_r_mte,
5169 gen_helper_sve_ldnf1bss_r_mte,
5170 gen_helper_sve_ldnf1bhs_r_mte,
5171 gen_helper_sve_ldnf1dd_le_r_mte },
5172
5173 /* mte inactive, big-endian */
5174 { gen_helper_sve_ldnf1bb_r_mte,
5175 gen_helper_sve_ldnf1bhu_r_mte,
5176 gen_helper_sve_ldnf1bsu_r_mte,
5177 gen_helper_sve_ldnf1bdu_r_mte,
5178
5179 gen_helper_sve_ldnf1sds_be_r_mte,
5180 gen_helper_sve_ldnf1hh_be_r_mte,
5181 gen_helper_sve_ldnf1hsu_be_r_mte,
5182 gen_helper_sve_ldnf1hdu_be_r_mte,
5183
5184 gen_helper_sve_ldnf1hds_be_r_mte,
5185 gen_helper_sve_ldnf1hss_be_r_mte,
5186 gen_helper_sve_ldnf1ss_be_r_mte,
5187 gen_helper_sve_ldnf1sdu_be_r_mte,
5188
5189 gen_helper_sve_ldnf1bds_r_mte,
5190 gen_helper_sve_ldnf1bss_r_mte,
5191 gen_helper_sve_ldnf1bhs_r_mte,
5192 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5193 };
5194
5195 if (sve_access_check(s)) {
5196 int vsz = vec_full_reg_size(s);
5197 int elements = vsz >> dtype_esz[a->dtype];
5198 int off = (a->imm * elements) << dtype_msz(a->dtype);
5199 TCGv_i64 addr = new_tmp_a64(s);
5200
5201 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5202 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5203 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5204 }
5205 return true;
5206}
1a039c7e 5207
05abe304
RH
5208static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
5209{
7d0a57a2
RH
5210 static gen_helper_gvec_mem * const fns[2][4] = {
5211 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
5212 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
5213 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
5214 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
5215 };
5216 unsigned vsz = vec_full_reg_size(s);
5217 TCGv_ptr t_pg;
500d0484
RH
5218 TCGv_i32 t_desc;
5219 int desc, poff;
05abe304
RH
5220
5221 /* Load the first quadword using the normal predicated load helpers. */
ba080b86 5222 desc = simd_desc(16, 16, zt);
500d0484 5223 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
5224
5225 poff = pred_full_reg_offset(s, pg);
5226 if (vsz > 16) {
5227 /*
5228 * Zero-extend the first 16 bits of the predicate into a temporary.
5229 * This avoids triggering an assert making sure we don't have bits
5230 * set within a predicate beyond VQ, but we have lowered VQ to 1
5231 * for this load operation.
5232 */
5233 TCGv_i64 tmp = tcg_temp_new_i64();
5234#ifdef HOST_WORDS_BIGENDIAN
5235 poff += 6;
5236#endif
5237 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5238
5239 poff = offsetof(CPUARMState, vfp.preg_tmp);
5240 tcg_gen_st_i64(tmp, cpu_env, poff);
5241 tcg_temp_free_i64(tmp);
5242 }
5243
05abe304 5244 t_pg = tcg_temp_new_ptr();
2a99ab2b 5245 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5246
500d0484 5247 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
5248
5249 tcg_temp_free_ptr(t_pg);
500d0484 5250 tcg_temp_free_i32(t_desc);
05abe304
RH
5251
5252 /* Replicate that first quadword. */
5253 if (vsz > 16) {
5254 unsigned dofs = vec_full_reg_offset(s, zt);
5255 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
5256 }
5257}
5258
3a7be554 5259static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5260{
5261 if (a->rm == 31) {
5262 return false;
5263 }
5264 if (sve_access_check(s)) {
5265 int msz = dtype_msz(a->dtype);
5266 TCGv_i64 addr = new_tmp_a64(s);
5267 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5268 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5269 do_ldrq(s, a->rd, a->pg, addr, msz);
5270 }
5271 return true;
5272}
5273
3a7be554 5274static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5275{
5276 if (sve_access_check(s)) {
5277 TCGv_i64 addr = new_tmp_a64(s);
5278 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5279 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
5280 }
5281 return true;
5282}
5283
68459864 5284/* Load and broadcast element. */
3a7be554 5285static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5286{
68459864
RH
5287 unsigned vsz = vec_full_reg_size(s);
5288 unsigned psz = pred_full_reg_size(s);
5289 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5290 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5291 TCGLabel *over;
4ac430e1 5292 TCGv_i64 temp, clean_addr;
68459864 5293
c0ed9166
RH
5294 if (!sve_access_check(s)) {
5295 return true;
5296 }
5297
5298 over = gen_new_label();
5299
68459864
RH
5300 /* If the guarding predicate has no bits set, no load occurs. */
5301 if (psz <= 8) {
5302 /* Reduce the pred_esz_masks value simply to reduce the
5303 * size of the code generated here.
5304 */
5305 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5306 temp = tcg_temp_new_i64();
5307 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5308 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5309 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5310 tcg_temp_free_i64(temp);
5311 } else {
5312 TCGv_i32 t32 = tcg_temp_new_i32();
5313 find_last_active(s, t32, esz, a->pg);
5314 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5315 tcg_temp_free_i32(t32);
5316 }
5317
5318 /* Load the data. */
5319 temp = tcg_temp_new_i64();
d0e372b0 5320 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5321 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5322
5323 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5324 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5325
5326 /* Broadcast to *all* elements. */
5327 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5328 vsz, vsz, temp);
5329 tcg_temp_free_i64(temp);
5330
5331 /* Zero the inactive elements. */
5332 gen_set_label(over);
60245996 5333 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5334}
5335
1a039c7e
RH
5336static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5337 int msz, int esz, int nreg)
5338{
71b9f394
RH
5339 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5340 { { { gen_helper_sve_st1bb_r,
5341 gen_helper_sve_st1bh_r,
5342 gen_helper_sve_st1bs_r,
5343 gen_helper_sve_st1bd_r },
5344 { NULL,
5345 gen_helper_sve_st1hh_le_r,
5346 gen_helper_sve_st1hs_le_r,
5347 gen_helper_sve_st1hd_le_r },
5348 { NULL, NULL,
5349 gen_helper_sve_st1ss_le_r,
5350 gen_helper_sve_st1sd_le_r },
5351 { NULL, NULL, NULL,
5352 gen_helper_sve_st1dd_le_r } },
5353 { { gen_helper_sve_st1bb_r,
5354 gen_helper_sve_st1bh_r,
5355 gen_helper_sve_st1bs_r,
5356 gen_helper_sve_st1bd_r },
5357 { NULL,
5358 gen_helper_sve_st1hh_be_r,
5359 gen_helper_sve_st1hs_be_r,
5360 gen_helper_sve_st1hd_be_r },
5361 { NULL, NULL,
5362 gen_helper_sve_st1ss_be_r,
5363 gen_helper_sve_st1sd_be_r },
5364 { NULL, NULL, NULL,
5365 gen_helper_sve_st1dd_be_r } } },
5366
5367 { { { gen_helper_sve_st1bb_r_mte,
5368 gen_helper_sve_st1bh_r_mte,
5369 gen_helper_sve_st1bs_r_mte,
5370 gen_helper_sve_st1bd_r_mte },
5371 { NULL,
5372 gen_helper_sve_st1hh_le_r_mte,
5373 gen_helper_sve_st1hs_le_r_mte,
5374 gen_helper_sve_st1hd_le_r_mte },
5375 { NULL, NULL,
5376 gen_helper_sve_st1ss_le_r_mte,
5377 gen_helper_sve_st1sd_le_r_mte },
5378 { NULL, NULL, NULL,
5379 gen_helper_sve_st1dd_le_r_mte } },
5380 { { gen_helper_sve_st1bb_r_mte,
5381 gen_helper_sve_st1bh_r_mte,
5382 gen_helper_sve_st1bs_r_mte,
5383 gen_helper_sve_st1bd_r_mte },
5384 { NULL,
5385 gen_helper_sve_st1hh_be_r_mte,
5386 gen_helper_sve_st1hs_be_r_mte,
5387 gen_helper_sve_st1hd_be_r_mte },
5388 { NULL, NULL,
5389 gen_helper_sve_st1ss_be_r_mte,
5390 gen_helper_sve_st1sd_be_r_mte },
5391 { NULL, NULL, NULL,
5392 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5393 };
71b9f394
RH
5394 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5395 { { { gen_helper_sve_st2bb_r,
5396 gen_helper_sve_st2hh_le_r,
5397 gen_helper_sve_st2ss_le_r,
5398 gen_helper_sve_st2dd_le_r },
5399 { gen_helper_sve_st3bb_r,
5400 gen_helper_sve_st3hh_le_r,
5401 gen_helper_sve_st3ss_le_r,
5402 gen_helper_sve_st3dd_le_r },
5403 { gen_helper_sve_st4bb_r,
5404 gen_helper_sve_st4hh_le_r,
5405 gen_helper_sve_st4ss_le_r,
5406 gen_helper_sve_st4dd_le_r } },
5407 { { gen_helper_sve_st2bb_r,
5408 gen_helper_sve_st2hh_be_r,
5409 gen_helper_sve_st2ss_be_r,
5410 gen_helper_sve_st2dd_be_r },
5411 { gen_helper_sve_st3bb_r,
5412 gen_helper_sve_st3hh_be_r,
5413 gen_helper_sve_st3ss_be_r,
5414 gen_helper_sve_st3dd_be_r },
5415 { gen_helper_sve_st4bb_r,
5416 gen_helper_sve_st4hh_be_r,
5417 gen_helper_sve_st4ss_be_r,
5418 gen_helper_sve_st4dd_be_r } } },
5419 { { { gen_helper_sve_st2bb_r_mte,
5420 gen_helper_sve_st2hh_le_r_mte,
5421 gen_helper_sve_st2ss_le_r_mte,
5422 gen_helper_sve_st2dd_le_r_mte },
5423 { gen_helper_sve_st3bb_r_mte,
5424 gen_helper_sve_st3hh_le_r_mte,
5425 gen_helper_sve_st3ss_le_r_mte,
5426 gen_helper_sve_st3dd_le_r_mte },
5427 { gen_helper_sve_st4bb_r_mte,
5428 gen_helper_sve_st4hh_le_r_mte,
5429 gen_helper_sve_st4ss_le_r_mte,
5430 gen_helper_sve_st4dd_le_r_mte } },
5431 { { gen_helper_sve_st2bb_r_mte,
5432 gen_helper_sve_st2hh_be_r_mte,
5433 gen_helper_sve_st2ss_be_r_mte,
5434 gen_helper_sve_st2dd_be_r_mte },
5435 { gen_helper_sve_st3bb_r_mte,
5436 gen_helper_sve_st3hh_be_r_mte,
5437 gen_helper_sve_st3ss_be_r_mte,
5438 gen_helper_sve_st3dd_be_r_mte },
5439 { gen_helper_sve_st4bb_r_mte,
5440 gen_helper_sve_st4hh_be_r_mte,
5441 gen_helper_sve_st4ss_be_r_mte,
5442 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5443 };
5444 gen_helper_gvec_mem *fn;
28d57f2d 5445 int be = s->be_data == MO_BE;
1a039c7e
RH
5446
5447 if (nreg == 0) {
5448 /* ST1 */
71b9f394
RH
5449 fn = fn_single[s->mte_active[0]][be][msz][esz];
5450 nreg = 1;
1a039c7e
RH
5451 } else {
5452 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5453 assert(msz == esz);
71b9f394 5454 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5455 }
5456 assert(fn != NULL);
71b9f394 5457 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5458}
5459
3a7be554 5460static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5461{
5462 if (a->rm == 31 || a->msz > a->esz) {
5463 return false;
5464 }
5465 if (sve_access_check(s)) {
5466 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5467 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5468 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5469 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5470 }
5471 return true;
5472}
5473
3a7be554 5474static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5475{
5476 if (a->msz > a->esz) {
5477 return false;
5478 }
5479 if (sve_access_check(s)) {
5480 int vsz = vec_full_reg_size(s);
5481 int elements = vsz >> a->esz;
5482 TCGv_i64 addr = new_tmp_a64(s);
5483
5484 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5485 (a->imm * elements * (a->nreg + 1)) << a->msz);
5486 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5487 }
5488 return true;
5489}
f6dbf62a
RH
5490
5491/*
5492 *** SVE gather loads / scatter stores
5493 */
5494
500d0484 5495static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5496 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5497 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5498{
5499 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5500 TCGv_ptr t_zm = tcg_temp_new_ptr();
5501 TCGv_ptr t_pg = tcg_temp_new_ptr();
5502 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 5503 TCGv_i32 t_desc;
d28d12f0 5504 int desc = 0;
500d0484 5505
d28d12f0
RH
5506 if (s->mte_active[0]) {
5507 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5508 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5509 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5510 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5511 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5512 desc <<= SVE_MTEDESC_SHIFT;
5513 }
cdecb3fc 5514 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 5515 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5516
5517 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5518 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5519 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5520 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5521
5522 tcg_temp_free_ptr(t_zt);
5523 tcg_temp_free_ptr(t_zm);
5524 tcg_temp_free_ptr(t_pg);
500d0484 5525 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5526}
5527
d28d12f0
RH
5528/* Indexed by [mte][be][ff][xs][u][msz]. */
5529static gen_helper_gvec_mem_scatter * const
5530gather_load_fn32[2][2][2][2][2][3] = {
5531 { /* MTE Inactive */
5532 { /* Little-endian */
5533 { { { gen_helper_sve_ldbss_zsu,
5534 gen_helper_sve_ldhss_le_zsu,
5535 NULL, },
5536 { gen_helper_sve_ldbsu_zsu,
5537 gen_helper_sve_ldhsu_le_zsu,
5538 gen_helper_sve_ldss_le_zsu, } },
5539 { { gen_helper_sve_ldbss_zss,
5540 gen_helper_sve_ldhss_le_zss,
5541 NULL, },
5542 { gen_helper_sve_ldbsu_zss,
5543 gen_helper_sve_ldhsu_le_zss,
5544 gen_helper_sve_ldss_le_zss, } } },
5545
5546 /* First-fault */
5547 { { { gen_helper_sve_ldffbss_zsu,
5548 gen_helper_sve_ldffhss_le_zsu,
5549 NULL, },
5550 { gen_helper_sve_ldffbsu_zsu,
5551 gen_helper_sve_ldffhsu_le_zsu,
5552 gen_helper_sve_ldffss_le_zsu, } },
5553 { { gen_helper_sve_ldffbss_zss,
5554 gen_helper_sve_ldffhss_le_zss,
5555 NULL, },
5556 { gen_helper_sve_ldffbsu_zss,
5557 gen_helper_sve_ldffhsu_le_zss,
5558 gen_helper_sve_ldffss_le_zss, } } } },
5559
5560 { /* Big-endian */
5561 { { { gen_helper_sve_ldbss_zsu,
5562 gen_helper_sve_ldhss_be_zsu,
5563 NULL, },
5564 { gen_helper_sve_ldbsu_zsu,
5565 gen_helper_sve_ldhsu_be_zsu,
5566 gen_helper_sve_ldss_be_zsu, } },
5567 { { gen_helper_sve_ldbss_zss,
5568 gen_helper_sve_ldhss_be_zss,
5569 NULL, },
5570 { gen_helper_sve_ldbsu_zss,
5571 gen_helper_sve_ldhsu_be_zss,
5572 gen_helper_sve_ldss_be_zss, } } },
5573
5574 /* First-fault */
5575 { { { gen_helper_sve_ldffbss_zsu,
5576 gen_helper_sve_ldffhss_be_zsu,
5577 NULL, },
5578 { gen_helper_sve_ldffbsu_zsu,
5579 gen_helper_sve_ldffhsu_be_zsu,
5580 gen_helper_sve_ldffss_be_zsu, } },
5581 { { gen_helper_sve_ldffbss_zss,
5582 gen_helper_sve_ldffhss_be_zss,
5583 NULL, },
5584 { gen_helper_sve_ldffbsu_zss,
5585 gen_helper_sve_ldffhsu_be_zss,
5586 gen_helper_sve_ldffss_be_zss, } } } } },
5587 { /* MTE Active */
5588 { /* Little-endian */
5589 { { { gen_helper_sve_ldbss_zsu_mte,
5590 gen_helper_sve_ldhss_le_zsu_mte,
5591 NULL, },
5592 { gen_helper_sve_ldbsu_zsu_mte,
5593 gen_helper_sve_ldhsu_le_zsu_mte,
5594 gen_helper_sve_ldss_le_zsu_mte, } },
5595 { { gen_helper_sve_ldbss_zss_mte,
5596 gen_helper_sve_ldhss_le_zss_mte,
5597 NULL, },
5598 { gen_helper_sve_ldbsu_zss_mte,
5599 gen_helper_sve_ldhsu_le_zss_mte,
5600 gen_helper_sve_ldss_le_zss_mte, } } },
5601
5602 /* First-fault */
5603 { { { gen_helper_sve_ldffbss_zsu_mte,
5604 gen_helper_sve_ldffhss_le_zsu_mte,
5605 NULL, },
5606 { gen_helper_sve_ldffbsu_zsu_mte,
5607 gen_helper_sve_ldffhsu_le_zsu_mte,
5608 gen_helper_sve_ldffss_le_zsu_mte, } },
5609 { { gen_helper_sve_ldffbss_zss_mte,
5610 gen_helper_sve_ldffhss_le_zss_mte,
5611 NULL, },
5612 { gen_helper_sve_ldffbsu_zss_mte,
5613 gen_helper_sve_ldffhsu_le_zss_mte,
5614 gen_helper_sve_ldffss_le_zss_mte, } } } },
5615
5616 { /* Big-endian */
5617 { { { gen_helper_sve_ldbss_zsu_mte,
5618 gen_helper_sve_ldhss_be_zsu_mte,
5619 NULL, },
5620 { gen_helper_sve_ldbsu_zsu_mte,
5621 gen_helper_sve_ldhsu_be_zsu_mte,
5622 gen_helper_sve_ldss_be_zsu_mte, } },
5623 { { gen_helper_sve_ldbss_zss_mte,
5624 gen_helper_sve_ldhss_be_zss_mte,
5625 NULL, },
5626 { gen_helper_sve_ldbsu_zss_mte,
5627 gen_helper_sve_ldhsu_be_zss_mte,
5628 gen_helper_sve_ldss_be_zss_mte, } } },
5629
5630 /* First-fault */
5631 { { { gen_helper_sve_ldffbss_zsu_mte,
5632 gen_helper_sve_ldffhss_be_zsu_mte,
5633 NULL, },
5634 { gen_helper_sve_ldffbsu_zsu_mte,
5635 gen_helper_sve_ldffhsu_be_zsu_mte,
5636 gen_helper_sve_ldffss_be_zsu_mte, } },
5637 { { gen_helper_sve_ldffbss_zss_mte,
5638 gen_helper_sve_ldffhss_be_zss_mte,
5639 NULL, },
5640 { gen_helper_sve_ldffbsu_zss_mte,
5641 gen_helper_sve_ldffhsu_be_zss_mte,
5642 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5643};
5644
5645/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5646static gen_helper_gvec_mem_scatter * const
5647gather_load_fn64[2][2][2][3][2][4] = {
5648 { /* MTE Inactive */
5649 { /* Little-endian */
5650 { { { gen_helper_sve_ldbds_zsu,
5651 gen_helper_sve_ldhds_le_zsu,
5652 gen_helper_sve_ldsds_le_zsu,
5653 NULL, },
5654 { gen_helper_sve_ldbdu_zsu,
5655 gen_helper_sve_ldhdu_le_zsu,
5656 gen_helper_sve_ldsdu_le_zsu,
5657 gen_helper_sve_lddd_le_zsu, } },
5658 { { gen_helper_sve_ldbds_zss,
5659 gen_helper_sve_ldhds_le_zss,
5660 gen_helper_sve_ldsds_le_zss,
5661 NULL, },
5662 { gen_helper_sve_ldbdu_zss,
5663 gen_helper_sve_ldhdu_le_zss,
5664 gen_helper_sve_ldsdu_le_zss,
5665 gen_helper_sve_lddd_le_zss, } },
5666 { { gen_helper_sve_ldbds_zd,
5667 gen_helper_sve_ldhds_le_zd,
5668 gen_helper_sve_ldsds_le_zd,
5669 NULL, },
5670 { gen_helper_sve_ldbdu_zd,
5671 gen_helper_sve_ldhdu_le_zd,
5672 gen_helper_sve_ldsdu_le_zd,
5673 gen_helper_sve_lddd_le_zd, } } },
5674
5675 /* First-fault */
5676 { { { gen_helper_sve_ldffbds_zsu,
5677 gen_helper_sve_ldffhds_le_zsu,
5678 gen_helper_sve_ldffsds_le_zsu,
5679 NULL, },
5680 { gen_helper_sve_ldffbdu_zsu,
5681 gen_helper_sve_ldffhdu_le_zsu,
5682 gen_helper_sve_ldffsdu_le_zsu,
5683 gen_helper_sve_ldffdd_le_zsu, } },
5684 { { gen_helper_sve_ldffbds_zss,
5685 gen_helper_sve_ldffhds_le_zss,
5686 gen_helper_sve_ldffsds_le_zss,
5687 NULL, },
5688 { gen_helper_sve_ldffbdu_zss,
5689 gen_helper_sve_ldffhdu_le_zss,
5690 gen_helper_sve_ldffsdu_le_zss,
5691 gen_helper_sve_ldffdd_le_zss, } },
5692 { { gen_helper_sve_ldffbds_zd,
5693 gen_helper_sve_ldffhds_le_zd,
5694 gen_helper_sve_ldffsds_le_zd,
5695 NULL, },
5696 { gen_helper_sve_ldffbdu_zd,
5697 gen_helper_sve_ldffhdu_le_zd,
5698 gen_helper_sve_ldffsdu_le_zd,
5699 gen_helper_sve_ldffdd_le_zd, } } } },
5700 { /* Big-endian */
5701 { { { gen_helper_sve_ldbds_zsu,
5702 gen_helper_sve_ldhds_be_zsu,
5703 gen_helper_sve_ldsds_be_zsu,
5704 NULL, },
5705 { gen_helper_sve_ldbdu_zsu,
5706 gen_helper_sve_ldhdu_be_zsu,
5707 gen_helper_sve_ldsdu_be_zsu,
5708 gen_helper_sve_lddd_be_zsu, } },
5709 { { gen_helper_sve_ldbds_zss,
5710 gen_helper_sve_ldhds_be_zss,
5711 gen_helper_sve_ldsds_be_zss,
5712 NULL, },
5713 { gen_helper_sve_ldbdu_zss,
5714 gen_helper_sve_ldhdu_be_zss,
5715 gen_helper_sve_ldsdu_be_zss,
5716 gen_helper_sve_lddd_be_zss, } },
5717 { { gen_helper_sve_ldbds_zd,
5718 gen_helper_sve_ldhds_be_zd,
5719 gen_helper_sve_ldsds_be_zd,
5720 NULL, },
5721 { gen_helper_sve_ldbdu_zd,
5722 gen_helper_sve_ldhdu_be_zd,
5723 gen_helper_sve_ldsdu_be_zd,
5724 gen_helper_sve_lddd_be_zd, } } },
5725
5726 /* First-fault */
5727 { { { gen_helper_sve_ldffbds_zsu,
5728 gen_helper_sve_ldffhds_be_zsu,
5729 gen_helper_sve_ldffsds_be_zsu,
5730 NULL, },
5731 { gen_helper_sve_ldffbdu_zsu,
5732 gen_helper_sve_ldffhdu_be_zsu,
5733 gen_helper_sve_ldffsdu_be_zsu,
5734 gen_helper_sve_ldffdd_be_zsu, } },
5735 { { gen_helper_sve_ldffbds_zss,
5736 gen_helper_sve_ldffhds_be_zss,
5737 gen_helper_sve_ldffsds_be_zss,
5738 NULL, },
5739 { gen_helper_sve_ldffbdu_zss,
5740 gen_helper_sve_ldffhdu_be_zss,
5741 gen_helper_sve_ldffsdu_be_zss,
5742 gen_helper_sve_ldffdd_be_zss, } },
5743 { { gen_helper_sve_ldffbds_zd,
5744 gen_helper_sve_ldffhds_be_zd,
5745 gen_helper_sve_ldffsds_be_zd,
5746 NULL, },
5747 { gen_helper_sve_ldffbdu_zd,
5748 gen_helper_sve_ldffhdu_be_zd,
5749 gen_helper_sve_ldffsdu_be_zd,
5750 gen_helper_sve_ldffdd_be_zd, } } } } },
5751 { /* MTE Active */
5752 { /* Little-endian */
5753 { { { gen_helper_sve_ldbds_zsu_mte,
5754 gen_helper_sve_ldhds_le_zsu_mte,
5755 gen_helper_sve_ldsds_le_zsu_mte,
5756 NULL, },
5757 { gen_helper_sve_ldbdu_zsu_mte,
5758 gen_helper_sve_ldhdu_le_zsu_mte,
5759 gen_helper_sve_ldsdu_le_zsu_mte,
5760 gen_helper_sve_lddd_le_zsu_mte, } },
5761 { { gen_helper_sve_ldbds_zss_mte,
5762 gen_helper_sve_ldhds_le_zss_mte,
5763 gen_helper_sve_ldsds_le_zss_mte,
5764 NULL, },
5765 { gen_helper_sve_ldbdu_zss_mte,
5766 gen_helper_sve_ldhdu_le_zss_mte,
5767 gen_helper_sve_ldsdu_le_zss_mte,
5768 gen_helper_sve_lddd_le_zss_mte, } },
5769 { { gen_helper_sve_ldbds_zd_mte,
5770 gen_helper_sve_ldhds_le_zd_mte,
5771 gen_helper_sve_ldsds_le_zd_mte,
5772 NULL, },
5773 { gen_helper_sve_ldbdu_zd_mte,
5774 gen_helper_sve_ldhdu_le_zd_mte,
5775 gen_helper_sve_ldsdu_le_zd_mte,
5776 gen_helper_sve_lddd_le_zd_mte, } } },
5777
5778 /* First-fault */
5779 { { { gen_helper_sve_ldffbds_zsu_mte,
5780 gen_helper_sve_ldffhds_le_zsu_mte,
5781 gen_helper_sve_ldffsds_le_zsu_mte,
5782 NULL, },
5783 { gen_helper_sve_ldffbdu_zsu_mte,
5784 gen_helper_sve_ldffhdu_le_zsu_mte,
5785 gen_helper_sve_ldffsdu_le_zsu_mte,
5786 gen_helper_sve_ldffdd_le_zsu_mte, } },
5787 { { gen_helper_sve_ldffbds_zss_mte,
5788 gen_helper_sve_ldffhds_le_zss_mte,
5789 gen_helper_sve_ldffsds_le_zss_mte,
5790 NULL, },
5791 { gen_helper_sve_ldffbdu_zss_mte,
5792 gen_helper_sve_ldffhdu_le_zss_mte,
5793 gen_helper_sve_ldffsdu_le_zss_mte,
5794 gen_helper_sve_ldffdd_le_zss_mte, } },
5795 { { gen_helper_sve_ldffbds_zd_mte,
5796 gen_helper_sve_ldffhds_le_zd_mte,
5797 gen_helper_sve_ldffsds_le_zd_mte,
5798 NULL, },
5799 { gen_helper_sve_ldffbdu_zd_mte,
5800 gen_helper_sve_ldffhdu_le_zd_mte,
5801 gen_helper_sve_ldffsdu_le_zd_mte,
5802 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5803 { /* Big-endian */
5804 { { { gen_helper_sve_ldbds_zsu_mte,
5805 gen_helper_sve_ldhds_be_zsu_mte,
5806 gen_helper_sve_ldsds_be_zsu_mte,
5807 NULL, },
5808 { gen_helper_sve_ldbdu_zsu_mte,
5809 gen_helper_sve_ldhdu_be_zsu_mte,
5810 gen_helper_sve_ldsdu_be_zsu_mte,
5811 gen_helper_sve_lddd_be_zsu_mte, } },
5812 { { gen_helper_sve_ldbds_zss_mte,
5813 gen_helper_sve_ldhds_be_zss_mte,
5814 gen_helper_sve_ldsds_be_zss_mte,
5815 NULL, },
5816 { gen_helper_sve_ldbdu_zss_mte,
5817 gen_helper_sve_ldhdu_be_zss_mte,
5818 gen_helper_sve_ldsdu_be_zss_mte,
5819 gen_helper_sve_lddd_be_zss_mte, } },
5820 { { gen_helper_sve_ldbds_zd_mte,
5821 gen_helper_sve_ldhds_be_zd_mte,
5822 gen_helper_sve_ldsds_be_zd_mte,
5823 NULL, },
5824 { gen_helper_sve_ldbdu_zd_mte,
5825 gen_helper_sve_ldhdu_be_zd_mte,
5826 gen_helper_sve_ldsdu_be_zd_mte,
5827 gen_helper_sve_lddd_be_zd_mte, } } },
5828
5829 /* First-fault */
5830 { { { gen_helper_sve_ldffbds_zsu_mte,
5831 gen_helper_sve_ldffhds_be_zsu_mte,
5832 gen_helper_sve_ldffsds_be_zsu_mte,
5833 NULL, },
5834 { gen_helper_sve_ldffbdu_zsu_mte,
5835 gen_helper_sve_ldffhdu_be_zsu_mte,
5836 gen_helper_sve_ldffsdu_be_zsu_mte,
5837 gen_helper_sve_ldffdd_be_zsu_mte, } },
5838 { { gen_helper_sve_ldffbds_zss_mte,
5839 gen_helper_sve_ldffhds_be_zss_mte,
5840 gen_helper_sve_ldffsds_be_zss_mte,
5841 NULL, },
5842 { gen_helper_sve_ldffbdu_zss_mte,
5843 gen_helper_sve_ldffhdu_be_zss_mte,
5844 gen_helper_sve_ldffsdu_be_zss_mte,
5845 gen_helper_sve_ldffdd_be_zss_mte, } },
5846 { { gen_helper_sve_ldffbds_zd_mte,
5847 gen_helper_sve_ldffhds_be_zd_mte,
5848 gen_helper_sve_ldffsds_be_zd_mte,
5849 NULL, },
5850 { gen_helper_sve_ldffbdu_zd_mte,
5851 gen_helper_sve_ldffhdu_be_zd_mte,
5852 gen_helper_sve_ldffsdu_be_zd_mte,
5853 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5854};
5855
3a7be554 5856static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5857{
5858 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5859 bool be = s->be_data == MO_BE;
5860 bool mte = s->mte_active[0];
673e9fa6
RH
5861
5862 if (!sve_access_check(s)) {
5863 return true;
5864 }
5865
5866 switch (a->esz) {
5867 case MO_32:
d28d12f0 5868 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5869 break;
5870 case MO_64:
d28d12f0 5871 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5872 break;
5873 }
5874 assert(fn != NULL);
5875
5876 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5877 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5878 return true;
5879}
5880
3a7be554 5881static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5882{
5883 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5884 bool be = s->be_data == MO_BE;
5885 bool mte = s->mte_active[0];
673e9fa6
RH
5886 TCGv_i64 imm;
5887
5888 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5889 return false;
5890 }
5891 if (!sve_access_check(s)) {
5892 return true;
5893 }
5894
5895 switch (a->esz) {
5896 case MO_32:
d28d12f0 5897 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5898 break;
5899 case MO_64:
d28d12f0 5900 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5901 break;
5902 }
5903 assert(fn != NULL);
5904
5905 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5906 * by loading the immediate into the scalar parameter.
5907 */
5908 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5909 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
5910 tcg_temp_free_i64(imm);
5911 return true;
5912}
5913
d28d12f0
RH
5914/* Indexed by [mte][be][xs][msz]. */
5915static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5916 { /* MTE Inactive */
5917 { /* Little-endian */
5918 { gen_helper_sve_stbs_zsu,
5919 gen_helper_sve_sths_le_zsu,
5920 gen_helper_sve_stss_le_zsu, },
5921 { gen_helper_sve_stbs_zss,
5922 gen_helper_sve_sths_le_zss,
5923 gen_helper_sve_stss_le_zss, } },
5924 { /* Big-endian */
5925 { gen_helper_sve_stbs_zsu,
5926 gen_helper_sve_sths_be_zsu,
5927 gen_helper_sve_stss_be_zsu, },
5928 { gen_helper_sve_stbs_zss,
5929 gen_helper_sve_sths_be_zss,
5930 gen_helper_sve_stss_be_zss, } } },
5931 { /* MTE Active */
5932 { /* Little-endian */
5933 { gen_helper_sve_stbs_zsu_mte,
5934 gen_helper_sve_sths_le_zsu_mte,
5935 gen_helper_sve_stss_le_zsu_mte, },
5936 { gen_helper_sve_stbs_zss_mte,
5937 gen_helper_sve_sths_le_zss_mte,
5938 gen_helper_sve_stss_le_zss_mte, } },
5939 { /* Big-endian */
5940 { gen_helper_sve_stbs_zsu_mte,
5941 gen_helper_sve_sths_be_zsu_mte,
5942 gen_helper_sve_stss_be_zsu_mte, },
5943 { gen_helper_sve_stbs_zss_mte,
5944 gen_helper_sve_sths_be_zss_mte,
5945 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5946};
5947
5948/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5949static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5950 { /* MTE Inactive */
5951 { /* Little-endian */
5952 { gen_helper_sve_stbd_zsu,
5953 gen_helper_sve_sthd_le_zsu,
5954 gen_helper_sve_stsd_le_zsu,
5955 gen_helper_sve_stdd_le_zsu, },
5956 { gen_helper_sve_stbd_zss,
5957 gen_helper_sve_sthd_le_zss,
5958 gen_helper_sve_stsd_le_zss,
5959 gen_helper_sve_stdd_le_zss, },
5960 { gen_helper_sve_stbd_zd,
5961 gen_helper_sve_sthd_le_zd,
5962 gen_helper_sve_stsd_le_zd,
5963 gen_helper_sve_stdd_le_zd, } },
5964 { /* Big-endian */
5965 { gen_helper_sve_stbd_zsu,
5966 gen_helper_sve_sthd_be_zsu,
5967 gen_helper_sve_stsd_be_zsu,
5968 gen_helper_sve_stdd_be_zsu, },
5969 { gen_helper_sve_stbd_zss,
5970 gen_helper_sve_sthd_be_zss,
5971 gen_helper_sve_stsd_be_zss,
5972 gen_helper_sve_stdd_be_zss, },
5973 { gen_helper_sve_stbd_zd,
5974 gen_helper_sve_sthd_be_zd,
5975 gen_helper_sve_stsd_be_zd,
5976 gen_helper_sve_stdd_be_zd, } } },
5977 { /* MTE Inactive */
5978 { /* Little-endian */
5979 { gen_helper_sve_stbd_zsu_mte,
5980 gen_helper_sve_sthd_le_zsu_mte,
5981 gen_helper_sve_stsd_le_zsu_mte,
5982 gen_helper_sve_stdd_le_zsu_mte, },
5983 { gen_helper_sve_stbd_zss_mte,
5984 gen_helper_sve_sthd_le_zss_mte,
5985 gen_helper_sve_stsd_le_zss_mte,
5986 gen_helper_sve_stdd_le_zss_mte, },
5987 { gen_helper_sve_stbd_zd_mte,
5988 gen_helper_sve_sthd_le_zd_mte,
5989 gen_helper_sve_stsd_le_zd_mte,
5990 gen_helper_sve_stdd_le_zd_mte, } },
5991 { /* Big-endian */
5992 { gen_helper_sve_stbd_zsu_mte,
5993 gen_helper_sve_sthd_be_zsu_mte,
5994 gen_helper_sve_stsd_be_zsu_mte,
5995 gen_helper_sve_stdd_be_zsu_mte, },
5996 { gen_helper_sve_stbd_zss_mte,
5997 gen_helper_sve_sthd_be_zss_mte,
5998 gen_helper_sve_stsd_be_zss_mte,
5999 gen_helper_sve_stdd_be_zss_mte, },
6000 { gen_helper_sve_stbd_zd_mte,
6001 gen_helper_sve_sthd_be_zd_mte,
6002 gen_helper_sve_stsd_be_zd_mte,
6003 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6004};
6005
3a7be554 6006static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6007{
f6dbf62a 6008 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6009 bool be = s->be_data == MO_BE;
6010 bool mte = s->mte_active[0];
f6dbf62a
RH
6011
6012 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6013 return false;
6014 }
6015 if (!sve_access_check(s)) {
6016 return true;
6017 }
6018 switch (a->esz) {
6019 case MO_32:
d28d12f0 6020 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6021 break;
6022 case MO_64:
d28d12f0 6023 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6024 break;
6025 default:
6026 g_assert_not_reached();
6027 }
6028 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6029 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6030 return true;
6031}
dec6cf6b 6032
3a7be554 6033static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6034{
6035 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6036 bool be = s->be_data == MO_BE;
6037 bool mte = s->mte_active[0];
408ecde9
RH
6038 TCGv_i64 imm;
6039
6040 if (a->esz < a->msz) {
6041 return false;
6042 }
6043 if (!sve_access_check(s)) {
6044 return true;
6045 }
6046
6047 switch (a->esz) {
6048 case MO_32:
d28d12f0 6049 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6050 break;
6051 case MO_64:
d28d12f0 6052 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6053 break;
6054 }
6055 assert(fn != NULL);
6056
6057 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6058 * by loading the immediate into the scalar parameter.
6059 */
6060 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 6061 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
6062 tcg_temp_free_i64(imm);
6063 return true;
6064}
6065
dec6cf6b
RH
6066/*
6067 * Prefetches
6068 */
6069
3a7be554 6070static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6071{
6072 /* Prefetch is a nop within QEMU. */
2f95a3b0 6073 (void)sve_access_check(s);
dec6cf6b
RH
6074 return true;
6075}
6076
3a7be554 6077static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6078{
6079 if (a->rm == 31) {
6080 return false;
6081 }
6082 /* Prefetch is a nop within QEMU. */
2f95a3b0 6083 (void)sve_access_check(s);
dec6cf6b
RH
6084 return true;
6085}
a2103582
RH
6086
6087/*
6088 * Move Prefix
6089 *
6090 * TODO: The implementation so far could handle predicated merging movprfx.
6091 * The helper functions as written take an extra source register to
6092 * use in the operation, but the result is only written when predication
6093 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6094 * to allow the final write back to the destination to be unconditional.
6095 * For predicated zeroing movprfx, we need to rearrange the helpers to
6096 * allow the final write back to zero inactives.
6097 *
6098 * In the meantime, just emit the moves.
6099 */
6100
3a7be554 6101static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6102{
6103 return do_mov_z(s, a->rd, a->rn);
6104}
6105
3a7be554 6106static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6107{
6108 if (sve_access_check(s)) {
6109 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6110 }
6111 return true;
6112}
6113
3a7be554 6114static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6115{
60245996 6116 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6117}
5dad1ba5
RH
6118
6119/*
6120 * SVE2 Integer Multiply - Unpredicated
6121 */
6122
6123static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6124{
6125 if (!dc_isar_feature(aa64_sve2, s)) {
6126 return false;
6127 }
6128 if (sve_access_check(s)) {
6129 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6130 }
6131 return true;
6132}
6133
6134static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6135 gen_helper_gvec_3 *fn)
6136{
6137 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6138 return false;
6139 }
6140 if (sve_access_check(s)) {
6141 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6142 }
6143 return true;
6144}
6145
6146static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6147{
6148 static gen_helper_gvec_3 * const fns[4] = {
6149 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6150 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6151 };
6152 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6153}
6154
6155static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6156{
6157 static gen_helper_gvec_3 * const fns[4] = {
6158 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6159 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6160 };
6161 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6162}
6163
6164static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6165{
6166 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6167}
d4b1e59d
RH
6168
6169/*
6170 * SVE2 Integer - Predicated
6171 */
6172
6173static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6174 gen_helper_gvec_4 *fn)
6175{
6176 if (!dc_isar_feature(aa64_sve2, s)) {
6177 return false;
6178 }
6179 return do_zpzz_ool(s, a, fn);
6180}
6181
6182static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6183{
6184 static gen_helper_gvec_4 * const fns[3] = {
6185 gen_helper_sve2_sadalp_zpzz_h,
6186 gen_helper_sve2_sadalp_zpzz_s,
6187 gen_helper_sve2_sadalp_zpzz_d,
6188 };
6189 if (a->esz == 0) {
6190 return false;
6191 }
6192 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6193}
6194
6195static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6196{
6197 static gen_helper_gvec_4 * const fns[3] = {
6198 gen_helper_sve2_uadalp_zpzz_h,
6199 gen_helper_sve2_uadalp_zpzz_s,
6200 gen_helper_sve2_uadalp_zpzz_d,
6201 };
6202 if (a->esz == 0) {
6203 return false;
6204 }
6205 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6206}
db366da8
RH
6207
6208/*
6209 * SVE2 integer unary operations (predicated)
6210 */
6211
6212static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6213 gen_helper_gvec_3 *fn)
6214{
6215 if (!dc_isar_feature(aa64_sve2, s)) {
6216 return false;
6217 }
6218 return do_zpz_ool(s, a, fn);
6219}
6220
6221static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6222{
6223 if (a->esz != 2) {
6224 return false;
6225 }
6226 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6227}
6228
6229static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6230{
6231 if (a->esz != 2) {
6232 return false;
6233 }
6234 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6235}
6236
6237static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6238{
6239 static gen_helper_gvec_3 * const fns[4] = {
6240 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6241 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6242 };
6243 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6244}
6245
6246static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6247{
6248 static gen_helper_gvec_3 * const fns[4] = {
6249 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6250 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6251 };
6252 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6253}
45d9503d
RH
6254
6255#define DO_SVE2_ZPZZ(NAME, name) \
6256static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6257{ \
6258 static gen_helper_gvec_4 * const fns[4] = { \
6259 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6260 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6261 }; \
6262 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6263}
6264
6265DO_SVE2_ZPZZ(SQSHL, sqshl)
6266DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6267DO_SVE2_ZPZZ(SRSHL, srshl)
6268
6269DO_SVE2_ZPZZ(UQSHL, uqshl)
6270DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6271DO_SVE2_ZPZZ(URSHL, urshl)
a47dc220
RH
6272
6273DO_SVE2_ZPZZ(SHADD, shadd)
6274DO_SVE2_ZPZZ(SRHADD, srhadd)
6275DO_SVE2_ZPZZ(SHSUB, shsub)
6276
6277DO_SVE2_ZPZZ(UHADD, uhadd)
6278DO_SVE2_ZPZZ(URHADD, urhadd)
6279DO_SVE2_ZPZZ(UHSUB, uhsub)
8597dc8b
RH
6280
6281DO_SVE2_ZPZZ(ADDP, addp)
6282DO_SVE2_ZPZZ(SMAXP, smaxp)
6283DO_SVE2_ZPZZ(UMAXP, umaxp)
6284DO_SVE2_ZPZZ(SMINP, sminp)
6285DO_SVE2_ZPZZ(UMINP, uminp)
4f07fbeb
RH
6286
6287DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6288DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6289DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6290DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6291DO_SVE2_ZPZZ(SUQADD, suqadd)
6292DO_SVE2_ZPZZ(USQADD, usqadd)
0ce1dda8
RH
6293
6294/*
6295 * SVE2 Widening Integer Arithmetic
6296 */
6297
6298static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6299 gen_helper_gvec_3 *fn, int data)
6300{
6301 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6302 return false;
6303 }
6304 if (sve_access_check(s)) {
6305 unsigned vsz = vec_full_reg_size(s);
6306 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6307 vec_full_reg_offset(s, a->rn),
6308 vec_full_reg_offset(s, a->rm),
6309 vsz, vsz, data, fn);
6310 }
6311 return true;
6312}
6313
6314#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6315static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6316{ \
6317 static gen_helper_gvec_3 * const fns[4] = { \
6318 NULL, gen_helper_sve2_##name##_h, \
6319 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6320 }; \
6321 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6322}
6323
6324DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6325DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6326DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6327
6328DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6329DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6330DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6331
6332DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6333DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6334DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6335
6336DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6337DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6338DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
daec426b
RH
6339
6340DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6341DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6342DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
81fccf09 6343
69ccc099
RH
6344DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6345DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6346
6347DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6348DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6349
6350DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6351DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6352
2df3ca55
RH
6353static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6354{
6355 static gen_helper_gvec_3 * const fns[4] = {
6356 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6357 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6358 };
6359 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6360}
6361
6362static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6363{
6364 return do_eor_tb(s, a, false);
6365}
6366
6367static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6368{
6369 return do_eor_tb(s, a, true);
6370}
6371
e3a56131
RH
6372static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6373{
6374 static gen_helper_gvec_3 * const fns[4] = {
6375 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6376 NULL, gen_helper_sve2_pmull_d,
6377 };
6378 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6379 return false;
6380 }
6381 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6382}
6383
6384static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6385{
6386 return do_trans_pmull(s, a, false);
6387}
6388
6389static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
6390{
6391 return do_trans_pmull(s, a, true);
6392}
6393
81fccf09
RH
6394#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
6395static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6396{ \
6397 static gen_helper_gvec_3 * const fns[4] = { \
6398 NULL, gen_helper_sve2_##name##_h, \
6399 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6400 }; \
6401 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
6402}
6403
6404DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
6405DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
6406DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
6407DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
6408
6409DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
6410DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
6411DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
6412DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
4269fef1
RH
6413
6414static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6415{
6416 int top = imm & 1;
6417 int shl = imm >> 1;
6418 int halfbits = 4 << vece;
6419
6420 if (top) {
6421 if (shl == halfbits) {
6422 TCGv_vec t = tcg_temp_new_vec_matching(d);
6423 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6424 tcg_gen_and_vec(vece, d, n, t);
6425 tcg_temp_free_vec(t);
6426 } else {
6427 tcg_gen_sari_vec(vece, d, n, halfbits);
6428 tcg_gen_shli_vec(vece, d, d, shl);
6429 }
6430 } else {
6431 tcg_gen_shli_vec(vece, d, n, halfbits);
6432 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6433 }
6434}
6435
6436static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6437{
6438 int halfbits = 4 << vece;
6439 int top = imm & 1;
6440 int shl = (imm >> 1);
6441 int shift;
6442 uint64_t mask;
6443
6444 mask = MAKE_64BIT_MASK(0, halfbits);
6445 mask <<= shl;
6446 mask = dup_const(vece, mask);
6447
6448 shift = shl - top * halfbits;
6449 if (shift < 0) {
6450 tcg_gen_shri_i64(d, n, -shift);
6451 } else {
6452 tcg_gen_shli_i64(d, n, shift);
6453 }
6454 tcg_gen_andi_i64(d, d, mask);
6455}
6456
6457static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6458{
6459 gen_ushll_i64(MO_16, d, n, imm);
6460}
6461
6462static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6463{
6464 gen_ushll_i64(MO_32, d, n, imm);
6465}
6466
6467static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6468{
6469 gen_ushll_i64(MO_64, d, n, imm);
6470}
6471
6472static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6473{
6474 int halfbits = 4 << vece;
6475 int top = imm & 1;
6476 int shl = imm >> 1;
6477
6478 if (top) {
6479 if (shl == halfbits) {
6480 TCGv_vec t = tcg_temp_new_vec_matching(d);
6481 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6482 tcg_gen_and_vec(vece, d, n, t);
6483 tcg_temp_free_vec(t);
6484 } else {
6485 tcg_gen_shri_vec(vece, d, n, halfbits);
6486 tcg_gen_shli_vec(vece, d, d, shl);
6487 }
6488 } else {
6489 if (shl == 0) {
6490 TCGv_vec t = tcg_temp_new_vec_matching(d);
6491 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6492 tcg_gen_and_vec(vece, d, n, t);
6493 tcg_temp_free_vec(t);
6494 } else {
6495 tcg_gen_shli_vec(vece, d, n, halfbits);
6496 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6497 }
6498 }
6499}
6500
6501static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6502 bool sel, bool uns)
6503{
6504 static const TCGOpcode sshll_list[] = {
6505 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6506 };
6507 static const TCGOpcode ushll_list[] = {
6508 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6509 };
6510 static const GVecGen2i ops[2][3] = {
6511 { { .fniv = gen_sshll_vec,
6512 .opt_opc = sshll_list,
6513 .fno = gen_helper_sve2_sshll_h,
6514 .vece = MO_16 },
6515 { .fniv = gen_sshll_vec,
6516 .opt_opc = sshll_list,
6517 .fno = gen_helper_sve2_sshll_s,
6518 .vece = MO_32 },
6519 { .fniv = gen_sshll_vec,
6520 .opt_opc = sshll_list,
6521 .fno = gen_helper_sve2_sshll_d,
6522 .vece = MO_64 } },
6523 { { .fni8 = gen_ushll16_i64,
6524 .fniv = gen_ushll_vec,
6525 .opt_opc = ushll_list,
6526 .fno = gen_helper_sve2_ushll_h,
6527 .vece = MO_16 },
6528 { .fni8 = gen_ushll32_i64,
6529 .fniv = gen_ushll_vec,
6530 .opt_opc = ushll_list,
6531 .fno = gen_helper_sve2_ushll_s,
6532 .vece = MO_32 },
6533 { .fni8 = gen_ushll64_i64,
6534 .fniv = gen_ushll_vec,
6535 .opt_opc = ushll_list,
6536 .fno = gen_helper_sve2_ushll_d,
6537 .vece = MO_64 } },
6538 };
6539
6540 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6541 return false;
6542 }
6543 if (sve_access_check(s)) {
6544 unsigned vsz = vec_full_reg_size(s);
6545 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6546 vec_full_reg_offset(s, a->rn),
6547 vsz, vsz, (a->imm << 1) | sel,
6548 &ops[uns][a->esz]);
6549 }
6550 return true;
6551}
6552
6553static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6554{
6555 return do_sve2_shll_tb(s, a, false, false);
6556}
6557
6558static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6559{
6560 return do_sve2_shll_tb(s, a, true, false);
6561}
6562
6563static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6564{
6565 return do_sve2_shll_tb(s, a, false, true);
6566}
6567
6568static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6569{
6570 return do_sve2_shll_tb(s, a, true, true);
6571}
cb9c33b8
RH
6572
6573static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
6574{
6575 static gen_helper_gvec_3 * const fns[4] = {
6576 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6577 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6578 };
6579 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
6580 return false;
6581 }
6582 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
6583}
6584
6585static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
6586{
6587 static gen_helper_gvec_3 * const fns[4] = {
6588 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6589 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6590 };
6591 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
6592 return false;
6593 }
6594 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
6595}
6596
6597static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
6598{
6599 static gen_helper_gvec_3 * const fns[4] = {
6600 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6601 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6602 };
6603 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
6604 return false;
6605 }
6606 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
6607}
ed4a6387
RH
6608
6609static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
6610{
6611 static gen_helper_gvec_3 * const fns[2][4] = {
6612 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6613 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
6614 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6615 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
6616 };
6617 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
6618}
6619
6620static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
6621{
6622 return do_cadd(s, a, false, false);
6623}
6624
6625static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
6626{
6627 return do_cadd(s, a, false, true);
6628}
6629
6630static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
6631{
6632 return do_cadd(s, a, true, false);
6633}
6634
6635static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
6636{
6637 return do_cadd(s, a, true, true);
6638}
38650638
RH
6639
6640static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
6641 gen_helper_gvec_4 *fn, int data)
6642{
6643 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6644 return false;
6645 }
6646 if (sve_access_check(s)) {
6647 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
6648 }
6649 return true;
6650}
6651
6652static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
6653{
6654 static gen_helper_gvec_4 * const fns[2][4] = {
6655 { NULL, gen_helper_sve2_sabal_h,
6656 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
6657 { NULL, gen_helper_sve2_uabal_h,
6658 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
6659 };
6660 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
6661}
6662
6663static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
6664{
6665 return do_abal(s, a, false, false);
6666}
6667
6668static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
6669{
6670 return do_abal(s, a, false, true);
6671}
6672
6673static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
6674{
6675 return do_abal(s, a, true, false);
6676}
6677
6678static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
6679{
6680 return do_abal(s, a, true, true);
6681}
b8295dfb
RH
6682
6683static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6684{
6685 static gen_helper_gvec_4 * const fns[2] = {
6686 gen_helper_sve2_adcl_s,
6687 gen_helper_sve2_adcl_d,
6688 };
6689 /*
6690 * Note that in this case the ESZ field encodes both size and sign.
6691 * Split out 'subtract' into bit 1 of the data field for the helper.
6692 */
6693 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
6694}
6695
6696static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
6697{
6698 return do_adcl(s, a, false);
6699}
6700
6701static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
6702{
6703 return do_adcl(s, a, true);
6704}
a7e3a90e
RH
6705
6706static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
6707{
6708 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
6709 return false;
6710 }
6711 if (sve_access_check(s)) {
6712 unsigned vsz = vec_full_reg_size(s);
6713 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
6714 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
6715 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
6716 }
6717 return true;
6718}
6719
6720static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
6721{
6722 return do_sve2_fn2i(s, a, gen_gvec_ssra);
6723}
6724
6725static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
6726{
6727 return do_sve2_fn2i(s, a, gen_gvec_usra);
6728}
6729
6730static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
6731{
6732 return do_sve2_fn2i(s, a, gen_gvec_srsra);
6733}
6734
6735static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
6736{
6737 return do_sve2_fn2i(s, a, gen_gvec_ursra);
6738}
fc12b46a
RH
6739
6740static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
6741{
6742 return do_sve2_fn2i(s, a, gen_gvec_sri);
6743}
6744
6745static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
6746{
6747 return do_sve2_fn2i(s, a, gen_gvec_sli);
6748}
289a1797
RH
6749
6750static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
6751{
6752 if (!dc_isar_feature(aa64_sve2, s)) {
6753 return false;
6754 }
6755 if (sve_access_check(s)) {
6756 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
6757 }
6758 return true;
6759}
6760
6761static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
6762{
6763 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
6764}
6765
6766static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
6767{
6768 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
6769}
5ff2838d
RH
6770
6771static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6772 const GVecGen2 ops[3])
6773{
6774 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6775 !dc_isar_feature(aa64_sve2, s)) {
6776 return false;
6777 }
6778 if (sve_access_check(s)) {
6779 unsigned vsz = vec_full_reg_size(s);
6780 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6781 vec_full_reg_offset(s, a->rn),
6782 vsz, vsz, &ops[a->esz]);
6783 }
6784 return true;
6785}
6786
6787static const TCGOpcode sqxtn_list[] = {
6788 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6789};
6790
6791static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6792{
6793 TCGv_vec t = tcg_temp_new_vec_matching(d);
6794 int halfbits = 4 << vece;
6795 int64_t mask = (1ull << halfbits) - 1;
6796 int64_t min = -1ull << (halfbits - 1);
6797 int64_t max = -min - 1;
6798
6799 tcg_gen_dupi_vec(vece, t, min);
6800 tcg_gen_smax_vec(vece, d, n, t);
6801 tcg_gen_dupi_vec(vece, t, max);
6802 tcg_gen_smin_vec(vece, d, d, t);
6803 tcg_gen_dupi_vec(vece, t, mask);
6804 tcg_gen_and_vec(vece, d, d, t);
6805 tcg_temp_free_vec(t);
6806}
6807
6808static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6809{
6810 static const GVecGen2 ops[3] = {
6811 { .fniv = gen_sqxtnb_vec,
6812 .opt_opc = sqxtn_list,
6813 .fno = gen_helper_sve2_sqxtnb_h,
6814 .vece = MO_16 },
6815 { .fniv = gen_sqxtnb_vec,
6816 .opt_opc = sqxtn_list,
6817 .fno = gen_helper_sve2_sqxtnb_s,
6818 .vece = MO_32 },
6819 { .fniv = gen_sqxtnb_vec,
6820 .opt_opc = sqxtn_list,
6821 .fno = gen_helper_sve2_sqxtnb_d,
6822 .vece = MO_64 },
6823 };
6824 return do_sve2_narrow_extract(s, a, ops);
6825}
6826
6827static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6828{
6829 TCGv_vec t = tcg_temp_new_vec_matching(d);
6830 int halfbits = 4 << vece;
6831 int64_t mask = (1ull << halfbits) - 1;
6832 int64_t min = -1ull << (halfbits - 1);
6833 int64_t max = -min - 1;
6834
6835 tcg_gen_dupi_vec(vece, t, min);
6836 tcg_gen_smax_vec(vece, n, n, t);
6837 tcg_gen_dupi_vec(vece, t, max);
6838 tcg_gen_smin_vec(vece, n, n, t);
6839 tcg_gen_shli_vec(vece, n, n, halfbits);
6840 tcg_gen_dupi_vec(vece, t, mask);
6841 tcg_gen_bitsel_vec(vece, d, t, d, n);
6842 tcg_temp_free_vec(t);
6843}
6844
6845static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6846{
6847 static const GVecGen2 ops[3] = {
6848 { .fniv = gen_sqxtnt_vec,
6849 .opt_opc = sqxtn_list,
6850 .load_dest = true,
6851 .fno = gen_helper_sve2_sqxtnt_h,
6852 .vece = MO_16 },
6853 { .fniv = gen_sqxtnt_vec,
6854 .opt_opc = sqxtn_list,
6855 .load_dest = true,
6856 .fno = gen_helper_sve2_sqxtnt_s,
6857 .vece = MO_32 },
6858 { .fniv = gen_sqxtnt_vec,
6859 .opt_opc = sqxtn_list,
6860 .load_dest = true,
6861 .fno = gen_helper_sve2_sqxtnt_d,
6862 .vece = MO_64 },
6863 };
6864 return do_sve2_narrow_extract(s, a, ops);
6865}
6866
6867static const TCGOpcode uqxtn_list[] = {
6868 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6869};
6870
6871static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6872{
6873 TCGv_vec t = tcg_temp_new_vec_matching(d);
6874 int halfbits = 4 << vece;
6875 int64_t max = (1ull << halfbits) - 1;
6876
6877 tcg_gen_dupi_vec(vece, t, max);
6878 tcg_gen_umin_vec(vece, d, n, t);
6879 tcg_temp_free_vec(t);
6880}
6881
6882static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6883{
6884 static const GVecGen2 ops[3] = {
6885 { .fniv = gen_uqxtnb_vec,
6886 .opt_opc = uqxtn_list,
6887 .fno = gen_helper_sve2_uqxtnb_h,
6888 .vece = MO_16 },
6889 { .fniv = gen_uqxtnb_vec,
6890 .opt_opc = uqxtn_list,
6891 .fno = gen_helper_sve2_uqxtnb_s,
6892 .vece = MO_32 },
6893 { .fniv = gen_uqxtnb_vec,
6894 .opt_opc = uqxtn_list,
6895 .fno = gen_helper_sve2_uqxtnb_d,
6896 .vece = MO_64 },
6897 };
6898 return do_sve2_narrow_extract(s, a, ops);
6899}
6900
6901static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6902{
6903 TCGv_vec t = tcg_temp_new_vec_matching(d);
6904 int halfbits = 4 << vece;
6905 int64_t max = (1ull << halfbits) - 1;
6906
6907 tcg_gen_dupi_vec(vece, t, max);
6908 tcg_gen_umin_vec(vece, n, n, t);
6909 tcg_gen_shli_vec(vece, n, n, halfbits);
6910 tcg_gen_bitsel_vec(vece, d, t, d, n);
6911 tcg_temp_free_vec(t);
6912}
6913
6914static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
6915{
6916 static const GVecGen2 ops[3] = {
6917 { .fniv = gen_uqxtnt_vec,
6918 .opt_opc = uqxtn_list,
6919 .load_dest = true,
6920 .fno = gen_helper_sve2_uqxtnt_h,
6921 .vece = MO_16 },
6922 { .fniv = gen_uqxtnt_vec,
6923 .opt_opc = uqxtn_list,
6924 .load_dest = true,
6925 .fno = gen_helper_sve2_uqxtnt_s,
6926 .vece = MO_32 },
6927 { .fniv = gen_uqxtnt_vec,
6928 .opt_opc = uqxtn_list,
6929 .load_dest = true,
6930 .fno = gen_helper_sve2_uqxtnt_d,
6931 .vece = MO_64 },
6932 };
6933 return do_sve2_narrow_extract(s, a, ops);
6934}
6935
6936static const TCGOpcode sqxtun_list[] = {
6937 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6938};
6939
6940static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6941{
6942 TCGv_vec t = tcg_temp_new_vec_matching(d);
6943 int halfbits = 4 << vece;
6944 int64_t max = (1ull << halfbits) - 1;
6945
6946 tcg_gen_dupi_vec(vece, t, 0);
6947 tcg_gen_smax_vec(vece, d, n, t);
6948 tcg_gen_dupi_vec(vece, t, max);
6949 tcg_gen_umin_vec(vece, d, d, t);
6950 tcg_temp_free_vec(t);
6951}
6952
6953static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
6954{
6955 static const GVecGen2 ops[3] = {
6956 { .fniv = gen_sqxtunb_vec,
6957 .opt_opc = sqxtun_list,
6958 .fno = gen_helper_sve2_sqxtunb_h,
6959 .vece = MO_16 },
6960 { .fniv = gen_sqxtunb_vec,
6961 .opt_opc = sqxtun_list,
6962 .fno = gen_helper_sve2_sqxtunb_s,
6963 .vece = MO_32 },
6964 { .fniv = gen_sqxtunb_vec,
6965 .opt_opc = sqxtun_list,
6966 .fno = gen_helper_sve2_sqxtunb_d,
6967 .vece = MO_64 },
6968 };
6969 return do_sve2_narrow_extract(s, a, ops);
6970}
6971
6972static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6973{
6974 TCGv_vec t = tcg_temp_new_vec_matching(d);
6975 int halfbits = 4 << vece;
6976 int64_t max = (1ull << halfbits) - 1;
6977
6978 tcg_gen_dupi_vec(vece, t, 0);
6979 tcg_gen_smax_vec(vece, n, n, t);
6980 tcg_gen_dupi_vec(vece, t, max);
6981 tcg_gen_umin_vec(vece, n, n, t);
6982 tcg_gen_shli_vec(vece, n, n, halfbits);
6983 tcg_gen_bitsel_vec(vece, d, t, d, n);
6984 tcg_temp_free_vec(t);
6985}
6986
6987static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
6988{
6989 static const GVecGen2 ops[3] = {
6990 { .fniv = gen_sqxtunt_vec,
6991 .opt_opc = sqxtun_list,
6992 .load_dest = true,
6993 .fno = gen_helper_sve2_sqxtunt_h,
6994 .vece = MO_16 },
6995 { .fniv = gen_sqxtunt_vec,
6996 .opt_opc = sqxtun_list,
6997 .load_dest = true,
6998 .fno = gen_helper_sve2_sqxtunt_s,
6999 .vece = MO_32 },
7000 { .fniv = gen_sqxtunt_vec,
7001 .opt_opc = sqxtun_list,
7002 .load_dest = true,
7003 .fno = gen_helper_sve2_sqxtunt_d,
7004 .vece = MO_64 },
7005 };
7006 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7007}
7008
7009static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7010 const GVecGen2i ops[3])
7011{
7012 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7013 return false;
7014 }
7015 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7016 if (sve_access_check(s)) {
7017 unsigned vsz = vec_full_reg_size(s);
7018 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7019 vec_full_reg_offset(s, a->rn),
7020 vsz, vsz, a->imm, &ops[a->esz]);
7021 }
7022 return true;
7023}
7024
7025static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7026{
7027 int halfbits = 4 << vece;
7028 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7029
7030 tcg_gen_shri_i64(d, n, shr);
7031 tcg_gen_andi_i64(d, d, mask);
7032}
7033
7034static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7035{
7036 gen_shrnb_i64(MO_16, d, n, shr);
7037}
7038
7039static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7040{
7041 gen_shrnb_i64(MO_32, d, n, shr);
7042}
7043
7044static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7045{
7046 gen_shrnb_i64(MO_64, d, n, shr);
7047}
7048
7049static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7050{
7051 TCGv_vec t = tcg_temp_new_vec_matching(d);
7052 int halfbits = 4 << vece;
7053 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7054
7055 tcg_gen_shri_vec(vece, n, n, shr);
7056 tcg_gen_dupi_vec(vece, t, mask);
7057 tcg_gen_and_vec(vece, d, n, t);
7058 tcg_temp_free_vec(t);
7059}
7060
7061static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7062{
7063 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7064 static const GVecGen2i ops[3] = {
7065 { .fni8 = gen_shrnb16_i64,
7066 .fniv = gen_shrnb_vec,
7067 .opt_opc = vec_list,
7068 .fno = gen_helper_sve2_shrnb_h,
7069 .vece = MO_16 },
7070 { .fni8 = gen_shrnb32_i64,
7071 .fniv = gen_shrnb_vec,
7072 .opt_opc = vec_list,
7073 .fno = gen_helper_sve2_shrnb_s,
7074 .vece = MO_32 },
7075 { .fni8 = gen_shrnb64_i64,
7076 .fniv = gen_shrnb_vec,
7077 .opt_opc = vec_list,
7078 .fno = gen_helper_sve2_shrnb_d,
7079 .vece = MO_64 },
7080 };
7081 return do_sve2_shr_narrow(s, a, ops);
7082}
7083
7084static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7085{
7086 int halfbits = 4 << vece;
7087 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7088
7089 tcg_gen_shli_i64(n, n, halfbits - shr);
7090 tcg_gen_andi_i64(n, n, ~mask);
7091 tcg_gen_andi_i64(d, d, mask);
7092 tcg_gen_or_i64(d, d, n);
7093}
7094
7095static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7096{
7097 gen_shrnt_i64(MO_16, d, n, shr);
7098}
7099
7100static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7101{
7102 gen_shrnt_i64(MO_32, d, n, shr);
7103}
7104
7105static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7106{
7107 tcg_gen_shri_i64(n, n, shr);
7108 tcg_gen_deposit_i64(d, d, n, 32, 32);
7109}
7110
7111static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7112{
7113 TCGv_vec t = tcg_temp_new_vec_matching(d);
7114 int halfbits = 4 << vece;
7115 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7116
7117 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7118 tcg_gen_dupi_vec(vece, t, mask);
7119 tcg_gen_bitsel_vec(vece, d, t, d, n);
7120 tcg_temp_free_vec(t);
7121}
7122
7123static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7124{
7125 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7126 static const GVecGen2i ops[3] = {
7127 { .fni8 = gen_shrnt16_i64,
7128 .fniv = gen_shrnt_vec,
7129 .opt_opc = vec_list,
7130 .load_dest = true,
7131 .fno = gen_helper_sve2_shrnt_h,
7132 .vece = MO_16 },
7133 { .fni8 = gen_shrnt32_i64,
7134 .fniv = gen_shrnt_vec,
7135 .opt_opc = vec_list,
7136 .load_dest = true,
7137 .fno = gen_helper_sve2_shrnt_s,
7138 .vece = MO_32 },
7139 { .fni8 = gen_shrnt64_i64,
7140 .fniv = gen_shrnt_vec,
7141 .opt_opc = vec_list,
7142 .load_dest = true,
7143 .fno = gen_helper_sve2_shrnt_d,
7144 .vece = MO_64 },
7145 };
7146 return do_sve2_shr_narrow(s, a, ops);
7147}
7148
7149static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7150{
7151 static const GVecGen2i ops[3] = {
7152 { .fno = gen_helper_sve2_rshrnb_h },
7153 { .fno = gen_helper_sve2_rshrnb_s },
7154 { .fno = gen_helper_sve2_rshrnb_d },
7155 };
7156 return do_sve2_shr_narrow(s, a, ops);
7157}
7158
7159static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7160{
7161 static const GVecGen2i ops[3] = {
7162 { .fno = gen_helper_sve2_rshrnt_h },
7163 { .fno = gen_helper_sve2_rshrnt_s },
7164 { .fno = gen_helper_sve2_rshrnt_d },
7165 };
7166 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7167}
7168
7169static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7170 TCGv_vec n, int64_t shr)
7171{
7172 TCGv_vec t = tcg_temp_new_vec_matching(d);
7173 int halfbits = 4 << vece;
7174
7175 tcg_gen_sari_vec(vece, n, n, shr);
7176 tcg_gen_dupi_vec(vece, t, 0);
7177 tcg_gen_smax_vec(vece, n, n, t);
7178 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7179 tcg_gen_umin_vec(vece, d, n, t);
7180 tcg_temp_free_vec(t);
7181}
7182
7183static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7184{
7185 static const TCGOpcode vec_list[] = {
7186 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7187 };
7188 static const GVecGen2i ops[3] = {
7189 { .fniv = gen_sqshrunb_vec,
7190 .opt_opc = vec_list,
7191 .fno = gen_helper_sve2_sqshrunb_h,
7192 .vece = MO_16 },
7193 { .fniv = gen_sqshrunb_vec,
7194 .opt_opc = vec_list,
7195 .fno = gen_helper_sve2_sqshrunb_s,
7196 .vece = MO_32 },
7197 { .fniv = gen_sqshrunb_vec,
7198 .opt_opc = vec_list,
7199 .fno = gen_helper_sve2_sqshrunb_d,
7200 .vece = MO_64 },
7201 };
7202 return do_sve2_shr_narrow(s, a, ops);
7203}
7204
7205static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7206 TCGv_vec n, int64_t shr)
7207{
7208 TCGv_vec t = tcg_temp_new_vec_matching(d);
7209 int halfbits = 4 << vece;
7210
7211 tcg_gen_sari_vec(vece, n, n, shr);
7212 tcg_gen_dupi_vec(vece, t, 0);
7213 tcg_gen_smax_vec(vece, n, n, t);
7214 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7215 tcg_gen_umin_vec(vece, n, n, t);
7216 tcg_gen_shli_vec(vece, n, n, halfbits);
7217 tcg_gen_bitsel_vec(vece, d, t, d, n);
7218 tcg_temp_free_vec(t);
7219}
7220
7221static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7222{
7223 static const TCGOpcode vec_list[] = {
7224 INDEX_op_shli_vec, INDEX_op_sari_vec,
7225 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7226 };
7227 static const GVecGen2i ops[3] = {
7228 { .fniv = gen_sqshrunt_vec,
7229 .opt_opc = vec_list,
7230 .load_dest = true,
7231 .fno = gen_helper_sve2_sqshrunt_h,
7232 .vece = MO_16 },
7233 { .fniv = gen_sqshrunt_vec,
7234 .opt_opc = vec_list,
7235 .load_dest = true,
7236 .fno = gen_helper_sve2_sqshrunt_s,
7237 .vece = MO_32 },
7238 { .fniv = gen_sqshrunt_vec,
7239 .opt_opc = vec_list,
7240 .load_dest = true,
7241 .fno = gen_helper_sve2_sqshrunt_d,
7242 .vece = MO_64 },
7243 };
7244 return do_sve2_shr_narrow(s, a, ops);
7245}
7246
7247static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7248{
7249 static const GVecGen2i ops[3] = {
7250 { .fno = gen_helper_sve2_sqrshrunb_h },
7251 { .fno = gen_helper_sve2_sqrshrunb_s },
7252 { .fno = gen_helper_sve2_sqrshrunb_d },
7253 };
7254 return do_sve2_shr_narrow(s, a, ops);
7255}
7256
7257static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7258{
7259 static const GVecGen2i ops[3] = {
7260 { .fno = gen_helper_sve2_sqrshrunt_h },
7261 { .fno = gen_helper_sve2_sqrshrunt_s },
7262 { .fno = gen_helper_sve2_sqrshrunt_d },
7263 };
7264 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7265}
7266
743bb147
RH
7267static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7268 TCGv_vec n, int64_t shr)
7269{
7270 TCGv_vec t = tcg_temp_new_vec_matching(d);
7271 int halfbits = 4 << vece;
7272 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7273 int64_t min = -max - 1;
7274
7275 tcg_gen_sari_vec(vece, n, n, shr);
7276 tcg_gen_dupi_vec(vece, t, min);
7277 tcg_gen_smax_vec(vece, n, n, t);
7278 tcg_gen_dupi_vec(vece, t, max);
7279 tcg_gen_smin_vec(vece, n, n, t);
7280 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7281 tcg_gen_and_vec(vece, d, n, t);
7282 tcg_temp_free_vec(t);
7283}
7284
7285static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7286{
7287 static const TCGOpcode vec_list[] = {
7288 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7289 };
7290 static const GVecGen2i ops[3] = {
7291 { .fniv = gen_sqshrnb_vec,
7292 .opt_opc = vec_list,
7293 .fno = gen_helper_sve2_sqshrnb_h,
7294 .vece = MO_16 },
7295 { .fniv = gen_sqshrnb_vec,
7296 .opt_opc = vec_list,
7297 .fno = gen_helper_sve2_sqshrnb_s,
7298 .vece = MO_32 },
7299 { .fniv = gen_sqshrnb_vec,
7300 .opt_opc = vec_list,
7301 .fno = gen_helper_sve2_sqshrnb_d,
7302 .vece = MO_64 },
7303 };
7304 return do_sve2_shr_narrow(s, a, ops);
7305}
7306
7307static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7308 TCGv_vec n, int64_t shr)
7309{
7310 TCGv_vec t = tcg_temp_new_vec_matching(d);
7311 int halfbits = 4 << vece;
7312 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7313 int64_t min = -max - 1;
7314
7315 tcg_gen_sari_vec(vece, n, n, shr);
7316 tcg_gen_dupi_vec(vece, t, min);
7317 tcg_gen_smax_vec(vece, n, n, t);
7318 tcg_gen_dupi_vec(vece, t, max);
7319 tcg_gen_smin_vec(vece, n, n, t);
7320 tcg_gen_shli_vec(vece, n, n, halfbits);
7321 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7322 tcg_gen_bitsel_vec(vece, d, t, d, n);
7323 tcg_temp_free_vec(t);
7324}
7325
7326static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7327{
7328 static const TCGOpcode vec_list[] = {
7329 INDEX_op_shli_vec, INDEX_op_sari_vec,
7330 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7331 };
7332 static const GVecGen2i ops[3] = {
7333 { .fniv = gen_sqshrnt_vec,
7334 .opt_opc = vec_list,
7335 .load_dest = true,
7336 .fno = gen_helper_sve2_sqshrnt_h,
7337 .vece = MO_16 },
7338 { .fniv = gen_sqshrnt_vec,
7339 .opt_opc = vec_list,
7340 .load_dest = true,
7341 .fno = gen_helper_sve2_sqshrnt_s,
7342 .vece = MO_32 },
7343 { .fniv = gen_sqshrnt_vec,
7344 .opt_opc = vec_list,
7345 .load_dest = true,
7346 .fno = gen_helper_sve2_sqshrnt_d,
7347 .vece = MO_64 },
7348 };
7349 return do_sve2_shr_narrow(s, a, ops);
7350}
7351
7352static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7353{
7354 static const GVecGen2i ops[3] = {
7355 { .fno = gen_helper_sve2_sqrshrnb_h },
7356 { .fno = gen_helper_sve2_sqrshrnb_s },
7357 { .fno = gen_helper_sve2_sqrshrnb_d },
7358 };
7359 return do_sve2_shr_narrow(s, a, ops);
7360}
7361
7362static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7363{
7364 static const GVecGen2i ops[3] = {
7365 { .fno = gen_helper_sve2_sqrshrnt_h },
7366 { .fno = gen_helper_sve2_sqrshrnt_s },
7367 { .fno = gen_helper_sve2_sqrshrnt_d },
7368 };
7369 return do_sve2_shr_narrow(s, a, ops);
7370}
7371
c13418da
RH
7372static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7373 TCGv_vec n, int64_t shr)
7374{
7375 TCGv_vec t = tcg_temp_new_vec_matching(d);
7376 int halfbits = 4 << vece;
7377
7378 tcg_gen_shri_vec(vece, n, n, shr);
7379 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7380 tcg_gen_umin_vec(vece, d, n, t);
7381 tcg_temp_free_vec(t);
7382}
7383
7384static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7385{
7386 static const TCGOpcode vec_list[] = {
7387 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7388 };
7389 static const GVecGen2i ops[3] = {
7390 { .fniv = gen_uqshrnb_vec,
7391 .opt_opc = vec_list,
7392 .fno = gen_helper_sve2_uqshrnb_h,
7393 .vece = MO_16 },
7394 { .fniv = gen_uqshrnb_vec,
7395 .opt_opc = vec_list,
7396 .fno = gen_helper_sve2_uqshrnb_s,
7397 .vece = MO_32 },
7398 { .fniv = gen_uqshrnb_vec,
7399 .opt_opc = vec_list,
7400 .fno = gen_helper_sve2_uqshrnb_d,
7401 .vece = MO_64 },
7402 };
7403 return do_sve2_shr_narrow(s, a, ops);
7404}
7405
7406static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7407 TCGv_vec n, int64_t shr)
7408{
7409 TCGv_vec t = tcg_temp_new_vec_matching(d);
7410 int halfbits = 4 << vece;
7411
7412 tcg_gen_shri_vec(vece, n, n, shr);
7413 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7414 tcg_gen_umin_vec(vece, n, n, t);
7415 tcg_gen_shli_vec(vece, n, n, halfbits);
7416 tcg_gen_bitsel_vec(vece, d, t, d, n);
7417 tcg_temp_free_vec(t);
7418}
7419
7420static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7421{
7422 static const TCGOpcode vec_list[] = {
7423 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7424 };
7425 static const GVecGen2i ops[3] = {
7426 { .fniv = gen_uqshrnt_vec,
7427 .opt_opc = vec_list,
7428 .load_dest = true,
7429 .fno = gen_helper_sve2_uqshrnt_h,
7430 .vece = MO_16 },
7431 { .fniv = gen_uqshrnt_vec,
7432 .opt_opc = vec_list,
7433 .load_dest = true,
7434 .fno = gen_helper_sve2_uqshrnt_s,
7435 .vece = MO_32 },
7436 { .fniv = gen_uqshrnt_vec,
7437 .opt_opc = vec_list,
7438 .load_dest = true,
7439 .fno = gen_helper_sve2_uqshrnt_d,
7440 .vece = MO_64 },
7441 };
7442 return do_sve2_shr_narrow(s, a, ops);
7443}
7444
7445static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7446{
7447 static const GVecGen2i ops[3] = {
7448 { .fno = gen_helper_sve2_uqrshrnb_h },
7449 { .fno = gen_helper_sve2_uqrshrnb_s },
7450 { .fno = gen_helper_sve2_uqrshrnb_d },
7451 };
7452 return do_sve2_shr_narrow(s, a, ops);
7453}
7454
7455static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7456{
7457 static const GVecGen2i ops[3] = {
7458 { .fno = gen_helper_sve2_uqrshrnt_h },
7459 { .fno = gen_helper_sve2_uqrshrnt_s },
7460 { .fno = gen_helper_sve2_uqrshrnt_d },
7461 };
7462 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7463}
b87dbeeb 7464
40d5ea50
SL
7465#define DO_SVE2_ZZZ_NARROW(NAME, name) \
7466static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
7467{ \
7468 static gen_helper_gvec_3 * const fns[4] = { \
7469 NULL, gen_helper_sve2_##name##_h, \
7470 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7471 }; \
7472 return do_sve2_zzz_ool(s, a, fns[a->esz]); \
7473}
7474
7475DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7476DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7477DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7478DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7479
c3cd6766
SL
7480DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7481DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7482DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7483DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7484
e0ae6ec3
SL
7485static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
7486 gen_helper_gvec_flags_4 *fn)
7487{
7488 if (!dc_isar_feature(aa64_sve2, s)) {
7489 return false;
7490 }
7491 return do_ppzz_flags(s, a, fn);
7492}
7493
7494#define DO_SVE2_PPZZ_MATCH(NAME, name) \
7495static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7496{ \
7497 static gen_helper_gvec_flags_4 * const fns[4] = { \
7498 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
7499 NULL, NULL \
7500 }; \
7501 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
7502}
7503
7504DO_SVE2_PPZZ_MATCH(MATCH, match)
7505DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
7506
7d47ac94
SL
7507static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
7508{
7509 static gen_helper_gvec_4 * const fns[2] = {
7510 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7511 };
7512 if (a->esz < 2) {
7513 return false;
7514 }
7515 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
7516}
7517
7518static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
7519{
7520 if (a->esz != 0) {
7521 return false;
7522 }
7523 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
7524}
7525
b87dbeeb
SL
7526static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7527 gen_helper_gvec_4_ptr *fn)
7528{
7529 if (!dc_isar_feature(aa64_sve2, s)) {
7530 return false;
7531 }
7532 return do_zpzz_fp(s, a, fn);
7533}
7534
7535#define DO_SVE2_ZPZZ_FP(NAME, name) \
7536static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7537{ \
7538 static gen_helper_gvec_4_ptr * const fns[4] = { \
7539 NULL, gen_helper_sve2_##name##_zpzz_h, \
7540 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7541 }; \
7542 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7543}
7544
7545DO_SVE2_ZPZZ_FP(FADDP, faddp)
7546DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7547DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7548DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7549DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7550
7551/*
7552 * SVE Integer Multiply-Add (unpredicated)
7553 */
7554
7555static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
7556 bool sel1, bool sel2)
7557{
7558 static gen_helper_gvec_4 * const fns[] = {
7559 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7560 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7561 };
7562 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
7563}
7564
7565static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
7566 bool sel1, bool sel2)
7567{
7568 static gen_helper_gvec_4 * const fns[] = {
7569 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7570 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7571 };
7572 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
7573}
7574
7575static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7576{
7577 return do_sqdmlal_zzzw(s, a, false, false);
7578}
7579
7580static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7581{
7582 return do_sqdmlal_zzzw(s, a, true, true);
7583}
7584
7585static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
7586{
7587 return do_sqdmlal_zzzw(s, a, false, true);
7588}
7589
7590static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7591{
7592 return do_sqdmlsl_zzzw(s, a, false, false);
7593}
7594
7595static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7596{
7597 return do_sqdmlsl_zzzw(s, a, true, true);
7598}
7599
7600static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
7601{
7602 return do_sqdmlsl_zzzw(s, a, false, true);
7603}
ab3ddf31
RH
7604
7605static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
7606{
7607 static gen_helper_gvec_4 * const fns[] = {
7608 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7609 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7610 };
7611 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
7612}
7613
7614static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
7615{
7616 static gen_helper_gvec_4 * const fns[] = {
7617 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7618 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7619 };
7620 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
7621}
45a32e80
RH
7622
7623static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7624{
7625 static gen_helper_gvec_4 * const fns[] = {
7626 NULL, gen_helper_sve2_smlal_zzzw_h,
7627 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7628 };
7629 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
7630}
7631
7632static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7633{
7634 return do_smlal_zzzw(s, a, false);
7635}
7636
7637static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7638{
7639 return do_smlal_zzzw(s, a, true);
7640}
7641
7642static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7643{
7644 static gen_helper_gvec_4 * const fns[] = {
7645 NULL, gen_helper_sve2_umlal_zzzw_h,
7646 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7647 };
7648 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
7649}
7650
7651static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7652{
7653 return do_umlal_zzzw(s, a, false);
7654}
7655
7656static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7657{
7658 return do_umlal_zzzw(s, a, true);
7659}
7660
7661static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7662{
7663 static gen_helper_gvec_4 * const fns[] = {
7664 NULL, gen_helper_sve2_smlsl_zzzw_h,
7665 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7666 };
7667 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
7668}
7669
7670static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7671{
7672 return do_smlsl_zzzw(s, a, false);
7673}
7674
7675static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7676{
7677 return do_smlsl_zzzw(s, a, true);
7678}
7679
7680static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7681{
7682 static gen_helper_gvec_4 * const fns[] = {
7683 NULL, gen_helper_sve2_umlsl_zzzw_h,
7684 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7685 };
7686 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
7687}
7688
7689static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7690{
7691 return do_umlsl_zzzw(s, a, false);
7692}
7693
7694static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7695{
7696 return do_umlsl_zzzw(s, a, true);
7697}
d782d3ca
RH
7698
7699static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
7700{
7701 static gen_helper_gvec_4 * const fns[] = {
7702 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7703 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7704 };
7705
7706 if (!dc_isar_feature(aa64_sve2, s)) {
7707 return false;
7708 }
7709 if (sve_access_check(s)) {
7710 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
7711 }
7712 return true;
7713}
7714
7715static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
7716{
7717 static gen_helper_gvec_4 * const fns[] = {
7718 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7719 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7720 };
7721
7722 if (!dc_isar_feature(aa64_sve2, s)) {
7723 return false;
7724 }
7725 if (sve_access_check(s)) {
7726 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
7727 }
7728 return true;
7729}