]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for gen_gvec_fn_arg_zzz
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
38650638 178/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 179static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
180 int rd, int rn, int rm, int ra, int data)
181{
7ad416b1
RH
182 if (fn == NULL) {
183 return false;
184 }
185 if (sve_access_check(s)) {
186 unsigned vsz = vec_full_reg_size(s);
187 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
188 vec_full_reg_offset(s, rn),
189 vec_full_reg_offset(s, rm),
190 vec_full_reg_offset(s, ra),
191 vsz, vsz, data, fn);
192 }
193 return true;
38650638
RH
194}
195
cab79ac9
RH
196static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
197 arg_rrrr_esz *a, int data)
198{
199 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
200}
201
e82d3536
RH
202static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
203 arg_rrxr_esz *a)
204{
205 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
206}
207
96a461f7 208/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 209static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
210 int rd, int rn, int pg, int data)
211{
8fb27a21
RH
212 if (fn == NULL) {
213 return false;
214 }
215 if (sve_access_check(s)) {
216 unsigned vsz = vec_full_reg_size(s);
217 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
218 vec_full_reg_offset(s, rn),
219 pred_full_reg_offset(s, pg),
220 vsz, vsz, data, fn);
221 }
222 return true;
96a461f7
RH
223}
224
b051809a
RH
225static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
226 arg_rpr_esz *a, int data)
227{
228 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
229}
230
afa2529c
RH
231static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
232 arg_rpri_esz *a)
233{
234 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
235}
b051809a 236
36cbb7a8 237/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 238static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
239 int rd, int rn, int rm, int pg, int data)
240{
2a753d1e
RH
241 if (fn == NULL) {
242 return false;
243 }
244 if (sve_access_check(s)) {
245 unsigned vsz = vec_full_reg_size(s);
246 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
247 vec_full_reg_offset(s, rn),
248 vec_full_reg_offset(s, rm),
249 pred_full_reg_offset(s, pg),
250 vsz, vsz, data, fn);
251 }
252 return true;
36cbb7a8 253}
f7d79c41 254
312016c9
RH
255static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
256 arg_rprr_esz *a, int data)
257{
258 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
259}
260
39eea561 261/* Invoke a vector expander on three Zregs. */
50f6db5f 262static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 263 int esz, int rd, int rn, int rm)
38388f7e 264{
50f6db5f
RH
265 if (gvec_fn == NULL) {
266 return false;
267 }
268 if (sve_access_check(s)) {
269 unsigned vsz = vec_full_reg_size(s);
270 gvec_fn(esz, vec_full_reg_offset(s, rd),
271 vec_full_reg_offset(s, rn),
272 vec_full_reg_offset(s, rm), vsz, vsz);
273 }
274 return true;
38388f7e
RH
275}
276
cd54bbe6
RH
277static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
278 arg_rrr_esz *a)
279{
280 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
281}
282
911cdc6d
RH
283/* Invoke a vector expander on four Zregs. */
284static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
285 int esz, int rd, int rn, int rm, int ra)
286{
287 unsigned vsz = vec_full_reg_size(s);
288 gvec_fn(esz, vec_full_reg_offset(s, rd),
289 vec_full_reg_offset(s, rn),
290 vec_full_reg_offset(s, rm),
291 vec_full_reg_offset(s, ra), vsz, vsz);
292}
293
39eea561
RH
294/* Invoke a vector move on two Zregs. */
295static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 296{
f7d79c41 297 if (sve_access_check(s)) {
5f730621
RH
298 unsigned vsz = vec_full_reg_size(s);
299 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
300 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
301 }
302 return true;
38388f7e
RH
303}
304
d9d78dcc
RH
305/* Initialize a Zreg with replications of a 64-bit immediate. */
306static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
307{
308 unsigned vsz = vec_full_reg_size(s);
8711e71f 309 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
310}
311
516e246a 312/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
313static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
314 int rd, int rn, int rm)
516e246a 315{
dd81a8d7
RH
316 unsigned psz = pred_gvec_reg_size(s);
317 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
318 pred_full_reg_offset(s, rn),
319 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
320}
321
322/* Invoke a vector move on two Pregs. */
323static bool do_mov_p(DisasContext *s, int rd, int rn)
324{
d0b2df5a
RH
325 if (sve_access_check(s)) {
326 unsigned psz = pred_gvec_reg_size(s);
327 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
328 pred_full_reg_offset(s, rn), psz, psz);
329 }
330 return true;
516e246a
RH
331}
332
9e18d7a6
RH
333/* Set the cpu flags as per a return from an SVE helper. */
334static void do_pred_flags(TCGv_i32 t)
335{
336 tcg_gen_mov_i32(cpu_NF, t);
337 tcg_gen_andi_i32(cpu_ZF, t, 2);
338 tcg_gen_andi_i32(cpu_CF, t, 1);
339 tcg_gen_movi_i32(cpu_VF, 0);
340}
341
342/* Subroutines computing the ARM PredTest psuedofunction. */
343static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
344{
345 TCGv_i32 t = tcg_temp_new_i32();
346
347 gen_helper_sve_predtest1(t, d, g);
348 do_pred_flags(t);
349 tcg_temp_free_i32(t);
350}
351
352static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
353{
354 TCGv_ptr dptr = tcg_temp_new_ptr();
355 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 356 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
357
358 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
359 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 360
392acacc 361 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
362 tcg_temp_free_ptr(dptr);
363 tcg_temp_free_ptr(gptr);
364
365 do_pred_flags(t);
366 tcg_temp_free_i32(t);
367}
368
028e2a7b
RH
369/* For each element size, the bits within a predicate word that are active. */
370const uint64_t pred_esz_masks[4] = {
371 0xffffffffffffffffull, 0x5555555555555555ull,
372 0x1111111111111111ull, 0x0101010101010101ull
373};
374
39eea561
RH
375/*
376 *** SVE Logical - Unpredicated Group
377 */
378
b262215b
RH
379TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
380TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
381TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
382TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 383
e6eba6e5
RH
384static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
385{
386 TCGv_i64 t = tcg_temp_new_i64();
387 uint64_t mask = dup_const(MO_8, 0xff >> sh);
388
389 tcg_gen_xor_i64(t, n, m);
390 tcg_gen_shri_i64(d, t, sh);
391 tcg_gen_shli_i64(t, t, 8 - sh);
392 tcg_gen_andi_i64(d, d, mask);
393 tcg_gen_andi_i64(t, t, ~mask);
394 tcg_gen_or_i64(d, d, t);
395 tcg_temp_free_i64(t);
396}
397
398static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
399{
400 TCGv_i64 t = tcg_temp_new_i64();
401 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
402
403 tcg_gen_xor_i64(t, n, m);
404 tcg_gen_shri_i64(d, t, sh);
405 tcg_gen_shli_i64(t, t, 16 - sh);
406 tcg_gen_andi_i64(d, d, mask);
407 tcg_gen_andi_i64(t, t, ~mask);
408 tcg_gen_or_i64(d, d, t);
409 tcg_temp_free_i64(t);
410}
411
412static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
413{
414 tcg_gen_xor_i32(d, n, m);
415 tcg_gen_rotri_i32(d, d, sh);
416}
417
418static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
419{
420 tcg_gen_xor_i64(d, n, m);
421 tcg_gen_rotri_i64(d, d, sh);
422}
423
424static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
425 TCGv_vec m, int64_t sh)
426{
427 tcg_gen_xor_vec(vece, d, n, m);
428 tcg_gen_rotri_vec(vece, d, d, sh);
429}
430
431void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
432 uint32_t rm_ofs, int64_t shift,
433 uint32_t opr_sz, uint32_t max_sz)
434{
435 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
436 static const GVecGen3i ops[4] = {
437 { .fni8 = gen_xar8_i64,
438 .fniv = gen_xar_vec,
439 .fno = gen_helper_sve2_xar_b,
440 .opt_opc = vecop,
441 .vece = MO_8 },
442 { .fni8 = gen_xar16_i64,
443 .fniv = gen_xar_vec,
444 .fno = gen_helper_sve2_xar_h,
445 .opt_opc = vecop,
446 .vece = MO_16 },
447 { .fni4 = gen_xar_i32,
448 .fniv = gen_xar_vec,
449 .fno = gen_helper_sve2_xar_s,
450 .opt_opc = vecop,
451 .vece = MO_32 },
452 { .fni8 = gen_xar_i64,
453 .fniv = gen_xar_vec,
454 .fno = gen_helper_gvec_xar_d,
455 .opt_opc = vecop,
456 .vece = MO_64 }
457 };
458 int esize = 8 << vece;
459
460 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
461 tcg_debug_assert(shift >= 0);
462 tcg_debug_assert(shift <= esize);
463 shift &= esize - 1;
464
465 if (shift == 0) {
466 /* xar with no rotate devolves to xor. */
467 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
468 } else {
469 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
470 shift, &ops[vece]);
471 }
472}
473
474static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
475{
476 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
477 return false;
478 }
479 if (sve_access_check(s)) {
480 unsigned vsz = vec_full_reg_size(s);
481 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
482 vec_full_reg_offset(s, a->rn),
483 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
484 }
485 return true;
486}
487
911cdc6d
RH
488static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
489{
490 if (!dc_isar_feature(aa64_sve2, s)) {
491 return false;
492 }
493 if (sve_access_check(s)) {
494 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
495 }
496 return true;
497}
498
499static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
500{
501 tcg_gen_xor_i64(d, n, m);
502 tcg_gen_xor_i64(d, d, k);
503}
504
505static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
506 TCGv_vec m, TCGv_vec k)
507{
508 tcg_gen_xor_vec(vece, d, n, m);
509 tcg_gen_xor_vec(vece, d, d, k);
510}
511
512static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
513 uint32_t a, uint32_t oprsz, uint32_t maxsz)
514{
515 static const GVecGen4 op = {
516 .fni8 = gen_eor3_i64,
517 .fniv = gen_eor3_vec,
518 .fno = gen_helper_sve2_eor3,
519 .vece = MO_64,
520 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
521 };
522 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
523}
524
525static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
526{
527 return do_sve2_zzzz_fn(s, a, gen_eor3);
528}
529
530static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
531{
532 tcg_gen_andc_i64(d, m, k);
533 tcg_gen_xor_i64(d, d, n);
534}
535
536static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
537 TCGv_vec m, TCGv_vec k)
538{
539 tcg_gen_andc_vec(vece, d, m, k);
540 tcg_gen_xor_vec(vece, d, d, n);
541}
542
543static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
544 uint32_t a, uint32_t oprsz, uint32_t maxsz)
545{
546 static const GVecGen4 op = {
547 .fni8 = gen_bcax_i64,
548 .fniv = gen_bcax_vec,
549 .fno = gen_helper_sve2_bcax,
550 .vece = MO_64,
551 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
552 };
553 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
554}
555
556static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
557{
558 return do_sve2_zzzz_fn(s, a, gen_bcax);
559}
560
561static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
562 uint32_t a, uint32_t oprsz, uint32_t maxsz)
563{
564 /* BSL differs from the generic bitsel in argument ordering. */
565 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
566}
567
568static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
569{
570 return do_sve2_zzzz_fn(s, a, gen_bsl);
571}
572
573static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
574{
575 tcg_gen_andc_i64(n, k, n);
576 tcg_gen_andc_i64(m, m, k);
577 tcg_gen_or_i64(d, n, m);
578}
579
580static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
581 TCGv_vec m, TCGv_vec k)
582{
583 if (TCG_TARGET_HAS_bitsel_vec) {
584 tcg_gen_not_vec(vece, n, n);
585 tcg_gen_bitsel_vec(vece, d, k, n, m);
586 } else {
587 tcg_gen_andc_vec(vece, n, k, n);
588 tcg_gen_andc_vec(vece, m, m, k);
589 tcg_gen_or_vec(vece, d, n, m);
590 }
591}
592
593static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
594 uint32_t a, uint32_t oprsz, uint32_t maxsz)
595{
596 static const GVecGen4 op = {
597 .fni8 = gen_bsl1n_i64,
598 .fniv = gen_bsl1n_vec,
599 .fno = gen_helper_sve2_bsl1n,
600 .vece = MO_64,
601 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
602 };
603 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
604}
605
606static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
607{
608 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
609}
610
611static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
612{
613 /*
614 * Z[dn] = (n & k) | (~m & ~k)
615 * = | ~(m | k)
616 */
617 tcg_gen_and_i64(n, n, k);
618 if (TCG_TARGET_HAS_orc_i64) {
619 tcg_gen_or_i64(m, m, k);
620 tcg_gen_orc_i64(d, n, m);
621 } else {
622 tcg_gen_nor_i64(m, m, k);
623 tcg_gen_or_i64(d, n, m);
624 }
625}
626
627static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
628 TCGv_vec m, TCGv_vec k)
629{
630 if (TCG_TARGET_HAS_bitsel_vec) {
631 tcg_gen_not_vec(vece, m, m);
632 tcg_gen_bitsel_vec(vece, d, k, n, m);
633 } else {
634 tcg_gen_and_vec(vece, n, n, k);
635 tcg_gen_or_vec(vece, m, m, k);
636 tcg_gen_orc_vec(vece, d, n, m);
637 }
638}
639
640static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
641 uint32_t a, uint32_t oprsz, uint32_t maxsz)
642{
643 static const GVecGen4 op = {
644 .fni8 = gen_bsl2n_i64,
645 .fniv = gen_bsl2n_vec,
646 .fno = gen_helper_sve2_bsl2n,
647 .vece = MO_64,
648 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
649 };
650 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
651}
652
653static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
654{
655 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
656}
657
658static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
659{
660 tcg_gen_and_i64(n, n, k);
661 tcg_gen_andc_i64(m, m, k);
662 tcg_gen_nor_i64(d, n, m);
663}
664
665static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
666 TCGv_vec m, TCGv_vec k)
667{
668 tcg_gen_bitsel_vec(vece, d, k, n, m);
669 tcg_gen_not_vec(vece, d, d);
670}
671
672static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
673 uint32_t a, uint32_t oprsz, uint32_t maxsz)
674{
675 static const GVecGen4 op = {
676 .fni8 = gen_nbsl_i64,
677 .fniv = gen_nbsl_vec,
678 .fno = gen_helper_sve2_nbsl,
679 .vece = MO_64,
680 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
681 };
682 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
683}
684
685static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
686{
687 return do_sve2_zzzz_fn(s, a, gen_nbsl);
688}
689
fea98f9c
RH
690/*
691 *** SVE Integer Arithmetic - Unpredicated Group
692 */
693
b262215b
RH
694TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
695TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
696TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
697TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
698TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
699TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 700
f97cfd59
RH
701/*
702 *** SVE Integer Arithmetic - Binary Predicated Group
703 */
704
a2103582
RH
705/* Select active elememnts from Zn and inactive elements from Zm,
706 * storing the result in Zd.
707 */
708static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
709{
710 static gen_helper_gvec_4 * const fns[4] = {
711 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
712 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
713 };
36cbb7a8 714 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
715}
716
8e7acb24
RH
717#define DO_ZPZZ(NAME, FEAT, name) \
718 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
719 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
720 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 721 }; \
8e7acb24
RH
722 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
723 name##_zpzz_fns[a->esz], a, 0)
724
725DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
726DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
727DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
728DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
729
730DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
731DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
732
733DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
734DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
735DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
736DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
737DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
738DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
739
740DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
741DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
742DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
743
744DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
745DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
746DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
747
748static gen_helper_gvec_4 * const sdiv_fns[4] = {
749 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
750};
751TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 752
8e7acb24
RH
753static gen_helper_gvec_4 * const udiv_fns[4] = {
754 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
755};
756TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 757
3a7be554 758static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
759{
760 if (sve_access_check(s)) {
761 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
762 }
763 return true;
764}
d3fe4a29 765
afac6d04
RH
766/*
767 *** SVE Integer Arithmetic - Unary Predicated Group
768 */
769
817bd5c9
RH
770#define DO_ZPZ(NAME, FEAT, name) \
771 static gen_helper_gvec_3 * const name##_fns[4] = { \
772 gen_helper_##name##_b, gen_helper_##name##_h, \
773 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 774 }; \
817bd5c9
RH
775 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
776
777DO_ZPZ(CLS, aa64_sve, sve_cls)
778DO_ZPZ(CLZ, aa64_sve, sve_clz)
779DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
780DO_ZPZ(CNOT, aa64_sve, sve_cnot)
781DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
782DO_ZPZ(ABS, aa64_sve, sve_abs)
783DO_ZPZ(NEG, aa64_sve, sve_neg)
784DO_ZPZ(RBIT, aa64_sve, sve_rbit)
785
786static gen_helper_gvec_3 * const fabs_fns[4] = {
787 NULL, gen_helper_sve_fabs_h,
788 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
789};
790TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 791
817bd5c9
RH
792static gen_helper_gvec_3 * const fneg_fns[4] = {
793 NULL, gen_helper_sve_fneg_h,
794 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
795};
796TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 797
817bd5c9
RH
798static gen_helper_gvec_3 * const sxtb_fns[4] = {
799 NULL, gen_helper_sve_sxtb_h,
800 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
801};
802TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 803
817bd5c9
RH
804static gen_helper_gvec_3 * const uxtb_fns[4] = {
805 NULL, gen_helper_sve_uxtb_h,
806 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
807};
808TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 809
817bd5c9
RH
810static gen_helper_gvec_3 * const sxth_fns[4] = {
811 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
812};
813TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 814
817bd5c9
RH
815static gen_helper_gvec_3 * const uxth_fns[4] = {
816 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
817};
818TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 819
817bd5c9
RH
820TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
821 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
822TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
823 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 824
047cec97
RH
825/*
826 *** SVE Integer Reduction Group
827 */
828
829typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
830static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
831 gen_helper_gvec_reduc *fn)
832{
833 unsigned vsz = vec_full_reg_size(s);
834 TCGv_ptr t_zn, t_pg;
835 TCGv_i32 desc;
836 TCGv_i64 temp;
837
838 if (fn == NULL) {
839 return false;
840 }
841 if (!sve_access_check(s)) {
842 return true;
843 }
844
c6a59b55 845 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
846 temp = tcg_temp_new_i64();
847 t_zn = tcg_temp_new_ptr();
848 t_pg = tcg_temp_new_ptr();
849
850 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
851 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
852 fn(temp, t_zn, t_pg, desc);
853 tcg_temp_free_ptr(t_zn);
854 tcg_temp_free_ptr(t_pg);
047cec97
RH
855
856 write_fp_dreg(s, a->rd, temp);
857 tcg_temp_free_i64(temp);
858 return true;
859}
860
861#define DO_VPZ(NAME, name) \
3a7be554 862static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
863{ \
864 static gen_helper_gvec_reduc * const fns[4] = { \
865 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
866 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
867 }; \
868 return do_vpz_ool(s, a, fns[a->esz]); \
869}
870
871DO_VPZ(ORV, orv)
872DO_VPZ(ANDV, andv)
873DO_VPZ(EORV, eorv)
874
875DO_VPZ(UADDV, uaddv)
876DO_VPZ(SMAXV, smaxv)
877DO_VPZ(UMAXV, umaxv)
878DO_VPZ(SMINV, sminv)
879DO_VPZ(UMINV, uminv)
880
3a7be554 881static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
882{
883 static gen_helper_gvec_reduc * const fns[4] = {
884 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
885 gen_helper_sve_saddv_s, NULL
886 };
887 return do_vpz_ool(s, a, fns[a->esz]);
888}
889
890#undef DO_VPZ
891
ccd841c3
RH
892/*
893 *** SVE Shift by Immediate - Predicated Group
894 */
895
60245996
RH
896/*
897 * Copy Zn into Zd, storing zeros into inactive elements.
898 * If invert, store zeros into the active elements.
ccd841c3 899 */
60245996
RH
900static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
901 int esz, bool invert)
ccd841c3 902{
60245996
RH
903 static gen_helper_gvec_3 * const fns[4] = {
904 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
905 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 906 };
8fb27a21 907 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
908}
909
3a7be554 910static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
911{
912 static gen_helper_gvec_3 * const fns[4] = {
913 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
914 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
915 };
916 if (a->esz < 0) {
917 /* Invalid tsz encoding -- see tszimm_esz. */
918 return false;
919 }
920 /* Shift by element size is architecturally valid. For
921 arithmetic right-shift, it's the same as by one less. */
922 a->imm = MIN(a->imm, (8 << a->esz) - 1);
afa2529c 923 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
924}
925
3a7be554 926static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
927{
928 static gen_helper_gvec_3 * const fns[4] = {
929 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
930 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
931 };
932 if (a->esz < 0) {
933 return false;
934 }
935 /* Shift by element size is architecturally valid.
936 For logical shifts, it is a zeroing operation. */
937 if (a->imm >= (8 << a->esz)) {
60245996 938 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 939 } else {
afa2529c 940 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
941 }
942}
943
3a7be554 944static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
945{
946 static gen_helper_gvec_3 * const fns[4] = {
947 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
948 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
949 };
950 if (a->esz < 0) {
951 return false;
952 }
953 /* Shift by element size is architecturally valid.
954 For logical shifts, it is a zeroing operation. */
955 if (a->imm >= (8 << a->esz)) {
60245996 956 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 957 } else {
afa2529c 958 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
959 }
960}
961
3a7be554 962static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
963{
964 static gen_helper_gvec_3 * const fns[4] = {
965 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
966 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
967 };
968 if (a->esz < 0) {
969 return false;
970 }
971 /* Shift by element size is architecturally valid. For arithmetic
972 right shift for division, it is a zeroing operation. */
973 if (a->imm >= (8 << a->esz)) {
60245996 974 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3 975 } else {
afa2529c 976 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
977 }
978}
979
4df37e41
RH
980static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
981 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
982 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
983};
984TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
985 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 986
4df37e41
RH
987static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
988 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
989 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
990};
991TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
992 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 993
4df37e41
RH
994static gen_helper_gvec_3 * const srshr_fns[4] = {
995 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
996 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
997};
998TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
999 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1000
4df37e41
RH
1001static gen_helper_gvec_3 * const urshr_fns[4] = {
1002 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1003 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1004};
1005TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1006 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1007
4df37e41
RH
1008static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1009 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1010 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1011};
1012TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1013 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1014
fe7f8dfb
RH
1015/*
1016 *** SVE Bitwise Shift - Predicated Group
1017 */
1018
1019#define DO_ZPZW(NAME, name) \
8e7acb24 1020 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1021 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1022 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1023 }; \
8e7acb24
RH
1024 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1025 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1026
1027DO_ZPZW(ASR, asr)
1028DO_ZPZW(LSR, lsr)
1029DO_ZPZW(LSL, lsl)
1030
1031#undef DO_ZPZW
1032
d9d78dcc
RH
1033/*
1034 *** SVE Bitwise Shift - Unpredicated Group
1035 */
1036
1037static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1038 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1039 int64_t, uint32_t, uint32_t))
1040{
1041 if (a->esz < 0) {
1042 /* Invalid tsz encoding -- see tszimm_esz. */
1043 return false;
1044 }
1045 if (sve_access_check(s)) {
1046 unsigned vsz = vec_full_reg_size(s);
1047 /* Shift by element size is architecturally valid. For
1048 arithmetic right-shift, it's the same as by one less.
1049 Otherwise it is a zeroing operation. */
1050 if (a->imm >= 8 << a->esz) {
1051 if (asr) {
1052 a->imm = (8 << a->esz) - 1;
1053 } else {
1054 do_dupi_z(s, a->rd, 0);
1055 return true;
1056 }
1057 }
1058 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1059 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1060 }
1061 return true;
1062}
1063
3a7be554 1064static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1065{
1066 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1067}
1068
3a7be554 1069static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1070{
1071 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1072}
1073
3a7be554 1074static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
1075{
1076 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1077}
1078
d9d78dcc 1079#define DO_ZZW(NAME, name) \
32e2ad65 1080 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1081 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1082 gen_helper_sve_##name##_zzw_s, NULL \
1083 }; \
32e2ad65
RH
1084 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1085 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1086
32e2ad65
RH
1087DO_ZZW(ASR_zzw, asr)
1088DO_ZZW(LSR_zzw, lsr)
1089DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1090
1091#undef DO_ZZW
1092
96a36e4a
RH
1093/*
1094 *** SVE Integer Multiply-Add Group
1095 */
1096
1097static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1098 gen_helper_gvec_5 *fn)
1099{
1100 if (sve_access_check(s)) {
1101 unsigned vsz = vec_full_reg_size(s);
1102 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1103 vec_full_reg_offset(s, a->ra),
1104 vec_full_reg_offset(s, a->rn),
1105 vec_full_reg_offset(s, a->rm),
1106 pred_full_reg_offset(s, a->pg),
1107 vsz, vsz, 0, fn);
1108 }
1109 return true;
1110}
1111
1112#define DO_ZPZZZ(NAME, name) \
3a7be554 1113static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
1114{ \
1115 static gen_helper_gvec_5 * const fns[4] = { \
1116 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1117 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1118 }; \
1119 return do_zpzzz_ool(s, a, fns[a->esz]); \
1120}
1121
1122DO_ZPZZZ(MLA, mla)
1123DO_ZPZZZ(MLS, mls)
1124
1125#undef DO_ZPZZZ
1126
9a56c9c3
RH
1127/*
1128 *** SVE Index Generation Group
1129 */
1130
1131static void do_index(DisasContext *s, int esz, int rd,
1132 TCGv_i64 start, TCGv_i64 incr)
1133{
1134 unsigned vsz = vec_full_reg_size(s);
c6a59b55 1135 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
9a56c9c3
RH
1136 TCGv_ptr t_zd = tcg_temp_new_ptr();
1137
1138 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1139 if (esz == 3) {
1140 gen_helper_sve_index_d(t_zd, start, incr, desc);
1141 } else {
1142 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1143 static index_fn * const fns[3] = {
1144 gen_helper_sve_index_b,
1145 gen_helper_sve_index_h,
1146 gen_helper_sve_index_s,
1147 };
1148 TCGv_i32 s32 = tcg_temp_new_i32();
1149 TCGv_i32 i32 = tcg_temp_new_i32();
1150
1151 tcg_gen_extrl_i64_i32(s32, start);
1152 tcg_gen_extrl_i64_i32(i32, incr);
1153 fns[esz](t_zd, s32, i32, desc);
1154
1155 tcg_temp_free_i32(s32);
1156 tcg_temp_free_i32(i32);
1157 }
1158 tcg_temp_free_ptr(t_zd);
9a56c9c3
RH
1159}
1160
3a7be554 1161static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
1162{
1163 if (sve_access_check(s)) {
b0c3aece
RH
1164 TCGv_i64 start = tcg_constant_i64(a->imm1);
1165 TCGv_i64 incr = tcg_constant_i64(a->imm2);
9a56c9c3 1166 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1167 }
1168 return true;
1169}
1170
3a7be554 1171static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
1172{
1173 if (sve_access_check(s)) {
b0c3aece 1174 TCGv_i64 start = tcg_constant_i64(a->imm);
9a56c9c3
RH
1175 TCGv_i64 incr = cpu_reg(s, a->rm);
1176 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1177 }
1178 return true;
1179}
1180
3a7be554 1181static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
1182{
1183 if (sve_access_check(s)) {
1184 TCGv_i64 start = cpu_reg(s, a->rn);
b0c3aece 1185 TCGv_i64 incr = tcg_constant_i64(a->imm);
9a56c9c3 1186 do_index(s, a->esz, a->rd, start, incr);
9a56c9c3
RH
1187 }
1188 return true;
1189}
1190
3a7be554 1191static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
1192{
1193 if (sve_access_check(s)) {
1194 TCGv_i64 start = cpu_reg(s, a->rn);
1195 TCGv_i64 incr = cpu_reg(s, a->rm);
1196 do_index(s, a->esz, a->rd, start, incr);
1197 }
1198 return true;
1199}
1200
96f922cc
RH
1201/*
1202 *** SVE Stack Allocation Group
1203 */
1204
3a7be554 1205static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1206{
5de56742
AC
1207 if (sve_access_check(s)) {
1208 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1209 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1210 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1211 }
96f922cc
RH
1212 return true;
1213}
1214
3a7be554 1215static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1216{
5de56742
AC
1217 if (sve_access_check(s)) {
1218 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1219 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1220 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1221 }
96f922cc
RH
1222 return true;
1223}
1224
3a7be554 1225static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1226{
5de56742
AC
1227 if (sve_access_check(s)) {
1228 TCGv_i64 reg = cpu_reg(s, a->rd);
1229 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1230 }
96f922cc
RH
1231 return true;
1232}
1233
4b242d9c
RH
1234/*
1235 *** SVE Compute Vector Address Group
1236 */
1237
1238static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1239{
913a8a00 1240 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1241}
1242
3a7be554 1243static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1244{
1245 return do_adr(s, a, gen_helper_sve_adr_p32);
1246}
1247
3a7be554 1248static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1249{
1250 return do_adr(s, a, gen_helper_sve_adr_p64);
1251}
1252
3a7be554 1253static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1254{
1255 return do_adr(s, a, gen_helper_sve_adr_s32);
1256}
1257
3a7be554 1258static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1259{
1260 return do_adr(s, a, gen_helper_sve_adr_u32);
1261}
1262
0762cd42
RH
1263/*
1264 *** SVE Integer Misc - Unpredicated Group
1265 */
1266
0ea3cdbf
RH
1267static gen_helper_gvec_2 * const fexpa_fns[4] = {
1268 NULL, gen_helper_sve_fexpa_h,
1269 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1270};
1271TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1272 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1273
32e2ad65
RH
1274static gen_helper_gvec_3 * const ftssel_fns[4] = {
1275 NULL, gen_helper_sve_ftssel_h,
1276 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1277};
1278TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1279
516e246a
RH
1280/*
1281 *** SVE Predicate Logical Operations Group
1282 */
1283
1284static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1285 const GVecGen4 *gvec_op)
1286{
1287 if (!sve_access_check(s)) {
1288 return true;
1289 }
1290
1291 unsigned psz = pred_gvec_reg_size(s);
1292 int dofs = pred_full_reg_offset(s, a->rd);
1293 int nofs = pred_full_reg_offset(s, a->rn);
1294 int mofs = pred_full_reg_offset(s, a->rm);
1295 int gofs = pred_full_reg_offset(s, a->pg);
1296
dd81a8d7
RH
1297 if (!a->s) {
1298 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1299 return true;
1300 }
1301
516e246a
RH
1302 if (psz == 8) {
1303 /* Do the operation and the flags generation in temps. */
1304 TCGv_i64 pd = tcg_temp_new_i64();
1305 TCGv_i64 pn = tcg_temp_new_i64();
1306 TCGv_i64 pm = tcg_temp_new_i64();
1307 TCGv_i64 pg = tcg_temp_new_i64();
1308
1309 tcg_gen_ld_i64(pn, cpu_env, nofs);
1310 tcg_gen_ld_i64(pm, cpu_env, mofs);
1311 tcg_gen_ld_i64(pg, cpu_env, gofs);
1312
1313 gvec_op->fni8(pd, pn, pm, pg);
1314 tcg_gen_st_i64(pd, cpu_env, dofs);
1315
1316 do_predtest1(pd, pg);
1317
1318 tcg_temp_free_i64(pd);
1319 tcg_temp_free_i64(pn);
1320 tcg_temp_free_i64(pm);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 /* The operation and flags generation is large. The computation
1324 * of the flags depends on the original contents of the guarding
1325 * predicate. If the destination overwrites the guarding predicate,
1326 * then the easiest way to get this right is to save a copy.
1327 */
1328 int tofs = gofs;
1329 if (a->rd == a->pg) {
1330 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1331 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1332 }
1333
1334 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1335 do_predtest(s, dofs, tofs, psz / 8);
1336 }
1337 return true;
1338}
1339
1340static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1341{
1342 tcg_gen_and_i64(pd, pn, pm);
1343 tcg_gen_and_i64(pd, pd, pg);
1344}
1345
1346static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1347 TCGv_vec pm, TCGv_vec pg)
1348{
1349 tcg_gen_and_vec(vece, pd, pn, pm);
1350 tcg_gen_and_vec(vece, pd, pd, pg);
1351}
1352
3a7be554 1353static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1354{
1355 static const GVecGen4 op = {
1356 .fni8 = gen_and_pg_i64,
1357 .fniv = gen_and_pg_vec,
1358 .fno = gen_helper_sve_and_pppp,
1359 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1360 };
dd81a8d7
RH
1361
1362 if (!a->s) {
1363 if (!sve_access_check(s)) {
1364 return true;
1365 }
1366 if (a->rn == a->rm) {
1367 if (a->pg == a->rn) {
1368 do_mov_p(s, a->rd, a->rn);
1369 } else {
1370 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1371 }
1372 return true;
1373 } else if (a->pg == a->rn || a->pg == a->rm) {
1374 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1375 return true;
516e246a 1376 }
516e246a 1377 }
dd81a8d7 1378 return do_pppp_flags(s, a, &op);
516e246a
RH
1379}
1380
1381static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1382{
1383 tcg_gen_andc_i64(pd, pn, pm);
1384 tcg_gen_and_i64(pd, pd, pg);
1385}
1386
1387static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1388 TCGv_vec pm, TCGv_vec pg)
1389{
1390 tcg_gen_andc_vec(vece, pd, pn, pm);
1391 tcg_gen_and_vec(vece, pd, pd, pg);
1392}
1393
3a7be554 1394static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1395{
1396 static const GVecGen4 op = {
1397 .fni8 = gen_bic_pg_i64,
1398 .fniv = gen_bic_pg_vec,
1399 .fno = gen_helper_sve_bic_pppp,
1400 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1401 };
dd81a8d7
RH
1402
1403 if (!a->s && a->pg == a->rn) {
1404 if (sve_access_check(s)) {
1405 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1406 }
1407 return true;
516e246a 1408 }
dd81a8d7 1409 return do_pppp_flags(s, a, &op);
516e246a
RH
1410}
1411
1412static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1413{
1414 tcg_gen_xor_i64(pd, pn, pm);
1415 tcg_gen_and_i64(pd, pd, pg);
1416}
1417
1418static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1419 TCGv_vec pm, TCGv_vec pg)
1420{
1421 tcg_gen_xor_vec(vece, pd, pn, pm);
1422 tcg_gen_and_vec(vece, pd, pd, pg);
1423}
1424
3a7be554 1425static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1426{
1427 static const GVecGen4 op = {
1428 .fni8 = gen_eor_pg_i64,
1429 .fniv = gen_eor_pg_vec,
1430 .fno = gen_helper_sve_eor_pppp,
1431 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1432 };
dd81a8d7 1433 return do_pppp_flags(s, a, &op);
516e246a
RH
1434}
1435
3a7be554 1436static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1437{
516e246a
RH
1438 if (a->s) {
1439 return false;
516e246a 1440 }
d4bc6232
RH
1441 if (sve_access_check(s)) {
1442 unsigned psz = pred_gvec_reg_size(s);
1443 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1444 pred_full_reg_offset(s, a->pg),
1445 pred_full_reg_offset(s, a->rn),
1446 pred_full_reg_offset(s, a->rm), psz, psz);
1447 }
1448 return true;
516e246a
RH
1449}
1450
1451static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1452{
1453 tcg_gen_or_i64(pd, pn, pm);
1454 tcg_gen_and_i64(pd, pd, pg);
1455}
1456
1457static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1458 TCGv_vec pm, TCGv_vec pg)
1459{
1460 tcg_gen_or_vec(vece, pd, pn, pm);
1461 tcg_gen_and_vec(vece, pd, pd, pg);
1462}
1463
3a7be554 1464static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1465{
1466 static const GVecGen4 op = {
1467 .fni8 = gen_orr_pg_i64,
1468 .fniv = gen_orr_pg_vec,
1469 .fno = gen_helper_sve_orr_pppp,
1470 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1471 };
dd81a8d7
RH
1472
1473 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1474 return do_mov_p(s, a->rd, a->rn);
516e246a 1475 }
dd81a8d7 1476 return do_pppp_flags(s, a, &op);
516e246a
RH
1477}
1478
1479static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1480{
1481 tcg_gen_orc_i64(pd, pn, pm);
1482 tcg_gen_and_i64(pd, pd, pg);
1483}
1484
1485static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1486 TCGv_vec pm, TCGv_vec pg)
1487{
1488 tcg_gen_orc_vec(vece, pd, pn, pm);
1489 tcg_gen_and_vec(vece, pd, pd, pg);
1490}
1491
3a7be554 1492static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1493{
1494 static const GVecGen4 op = {
1495 .fni8 = gen_orn_pg_i64,
1496 .fniv = gen_orn_pg_vec,
1497 .fno = gen_helper_sve_orn_pppp,
1498 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1499 };
dd81a8d7 1500 return do_pppp_flags(s, a, &op);
516e246a
RH
1501}
1502
1503static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1504{
1505 tcg_gen_or_i64(pd, pn, pm);
1506 tcg_gen_andc_i64(pd, pg, pd);
1507}
1508
1509static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1510 TCGv_vec pm, TCGv_vec pg)
1511{
1512 tcg_gen_or_vec(vece, pd, pn, pm);
1513 tcg_gen_andc_vec(vece, pd, pg, pd);
1514}
1515
3a7be554 1516static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1517{
1518 static const GVecGen4 op = {
1519 .fni8 = gen_nor_pg_i64,
1520 .fniv = gen_nor_pg_vec,
1521 .fno = gen_helper_sve_nor_pppp,
1522 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1523 };
dd81a8d7 1524 return do_pppp_flags(s, a, &op);
516e246a
RH
1525}
1526
1527static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1528{
1529 tcg_gen_and_i64(pd, pn, pm);
1530 tcg_gen_andc_i64(pd, pg, pd);
1531}
1532
1533static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1534 TCGv_vec pm, TCGv_vec pg)
1535{
1536 tcg_gen_and_vec(vece, pd, pn, pm);
1537 tcg_gen_andc_vec(vece, pd, pg, pd);
1538}
1539
3a7be554 1540static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1541{
1542 static const GVecGen4 op = {
1543 .fni8 = gen_nand_pg_i64,
1544 .fniv = gen_nand_pg_vec,
1545 .fno = gen_helper_sve_nand_pppp,
1546 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1547 };
dd81a8d7 1548 return do_pppp_flags(s, a, &op);
516e246a
RH
1549}
1550
9e18d7a6
RH
1551/*
1552 *** SVE Predicate Misc Group
1553 */
1554
3a7be554 1555static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1556{
1557 if (sve_access_check(s)) {
1558 int nofs = pred_full_reg_offset(s, a->rn);
1559 int gofs = pred_full_reg_offset(s, a->pg);
1560 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1561
1562 if (words == 1) {
1563 TCGv_i64 pn = tcg_temp_new_i64();
1564 TCGv_i64 pg = tcg_temp_new_i64();
1565
1566 tcg_gen_ld_i64(pn, cpu_env, nofs);
1567 tcg_gen_ld_i64(pg, cpu_env, gofs);
1568 do_predtest1(pn, pg);
1569
1570 tcg_temp_free_i64(pn);
1571 tcg_temp_free_i64(pg);
1572 } else {
1573 do_predtest(s, nofs, gofs, words);
1574 }
1575 }
1576 return true;
1577}
1578
028e2a7b
RH
1579/* See the ARM pseudocode DecodePredCount. */
1580static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1581{
1582 unsigned elements = fullsz >> esz;
1583 unsigned bound;
1584
1585 switch (pattern) {
1586 case 0x0: /* POW2 */
1587 return pow2floor(elements);
1588 case 0x1: /* VL1 */
1589 case 0x2: /* VL2 */
1590 case 0x3: /* VL3 */
1591 case 0x4: /* VL4 */
1592 case 0x5: /* VL5 */
1593 case 0x6: /* VL6 */
1594 case 0x7: /* VL7 */
1595 case 0x8: /* VL8 */
1596 bound = pattern;
1597 break;
1598 case 0x9: /* VL16 */
1599 case 0xa: /* VL32 */
1600 case 0xb: /* VL64 */
1601 case 0xc: /* VL128 */
1602 case 0xd: /* VL256 */
1603 bound = 16 << (pattern - 9);
1604 break;
1605 case 0x1d: /* MUL4 */
1606 return elements - elements % 4;
1607 case 0x1e: /* MUL3 */
1608 return elements - elements % 3;
1609 case 0x1f: /* ALL */
1610 return elements;
1611 default: /* #uimm5 */
1612 return 0;
1613 }
1614 return elements >= bound ? bound : 0;
1615}
1616
1617/* This handles all of the predicate initialization instructions,
1618 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1619 * so that decode_pred_count returns 0. For SETFFR, we will have
1620 * set RD == 16 == FFR.
1621 */
1622static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1623{
1624 if (!sve_access_check(s)) {
1625 return true;
1626 }
1627
1628 unsigned fullsz = vec_full_reg_size(s);
1629 unsigned ofs = pred_full_reg_offset(s, rd);
1630 unsigned numelem, setsz, i;
1631 uint64_t word, lastword;
1632 TCGv_i64 t;
1633
1634 numelem = decode_pred_count(fullsz, pat, esz);
1635
1636 /* Determine what we must store into each bit, and how many. */
1637 if (numelem == 0) {
1638 lastword = word = 0;
1639 setsz = fullsz;
1640 } else {
1641 setsz = numelem << esz;
1642 lastword = word = pred_esz_masks[esz];
1643 if (setsz % 64) {
973558a3 1644 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1645 }
1646 }
1647
1648 t = tcg_temp_new_i64();
1649 if (fullsz <= 64) {
1650 tcg_gen_movi_i64(t, lastword);
1651 tcg_gen_st_i64(t, cpu_env, ofs);
1652 goto done;
1653 }
1654
1655 if (word == lastword) {
1656 unsigned maxsz = size_for_gvec(fullsz / 8);
1657 unsigned oprsz = size_for_gvec(setsz / 8);
1658
1659 if (oprsz * 8 == setsz) {
8711e71f 1660 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1661 goto done;
1662 }
028e2a7b
RH
1663 }
1664
1665 setsz /= 8;
1666 fullsz /= 8;
1667
1668 tcg_gen_movi_i64(t, word);
973558a3 1669 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1670 tcg_gen_st_i64(t, cpu_env, ofs + i);
1671 }
1672 if (lastword != word) {
1673 tcg_gen_movi_i64(t, lastword);
1674 tcg_gen_st_i64(t, cpu_env, ofs + i);
1675 i += 8;
1676 }
1677 if (i < fullsz) {
1678 tcg_gen_movi_i64(t, 0);
1679 for (; i < fullsz; i += 8) {
1680 tcg_gen_st_i64(t, cpu_env, ofs + i);
1681 }
1682 }
1683
1684 done:
1685 tcg_temp_free_i64(t);
1686
1687 /* PTRUES */
1688 if (setflag) {
1689 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1690 tcg_gen_movi_i32(cpu_CF, word == 0);
1691 tcg_gen_movi_i32(cpu_VF, 0);
1692 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1693 }
1694 return true;
1695}
1696
3a7be554 1697static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1698{
1699 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1700}
1701
3a7be554 1702static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1703{
1704 /* Note pat == 31 is #all, to set all elements. */
1705 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1706}
1707
3a7be554 1708static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1709{
1710 /* Note pat == 32 is #unimp, to set no elements. */
1711 return do_predset(s, 0, a->rd, 32, false);
1712}
1713
3a7be554 1714static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1715{
1716 /* The path through do_pppp_flags is complicated enough to want to avoid
1717 * duplication. Frob the arguments into the form of a predicated AND.
1718 */
1719 arg_rprr_s alt_a = {
1720 .rd = a->rd, .pg = a->pg, .s = a->s,
1721 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1722 };
3a7be554 1723 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1724}
1725
3a7be554 1726static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1727{
1728 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1729}
1730
3a7be554 1731static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1732{
1733 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1734}
1735
1736static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1737 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1738 TCGv_ptr, TCGv_i32))
1739{
1740 if (!sve_access_check(s)) {
1741 return true;
1742 }
1743
1744 TCGv_ptr t_pd = tcg_temp_new_ptr();
1745 TCGv_ptr t_pg = tcg_temp_new_ptr();
1746 TCGv_i32 t;
86300b5d 1747 unsigned desc = 0;
028e2a7b 1748
86300b5d
RH
1749 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1750 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1751
1752 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1753 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1754 t = tcg_temp_new_i32();
028e2a7b 1755
392acacc 1756 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1757 tcg_temp_free_ptr(t_pd);
1758 tcg_temp_free_ptr(t_pg);
1759
1760 do_pred_flags(t);
1761 tcg_temp_free_i32(t);
1762 return true;
1763}
1764
3a7be554 1765static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1766{
1767 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1768}
1769
3a7be554 1770static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1771{
1772 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1773}
1774
24e82e68
RH
1775/*
1776 *** SVE Element Count Group
1777 */
1778
1779/* Perform an inline saturating addition of a 32-bit value within
1780 * a 64-bit register. The second operand is known to be positive,
1781 * which halves the comparisions we must perform to bound the result.
1782 */
1783static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1784{
1785 int64_t ibound;
24e82e68
RH
1786
1787 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1788 if (u) {
1789 tcg_gen_ext32u_i64(reg, reg);
1790 } else {
1791 tcg_gen_ext32s_i64(reg, reg);
1792 }
1793 if (d) {
1794 tcg_gen_sub_i64(reg, reg, val);
1795 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1796 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1797 } else {
1798 tcg_gen_add_i64(reg, reg, val);
1799 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1800 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1801 }
24e82e68
RH
1802}
1803
1804/* Similarly with 64-bit values. */
1805static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1806{
1807 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1808 TCGv_i64 t2;
1809
1810 if (u) {
1811 if (d) {
1812 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1813 t2 = tcg_constant_i64(0);
1814 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1815 } else {
1816 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1817 t2 = tcg_constant_i64(-1);
1818 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1819 }
1820 } else {
35a1ec8e 1821 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1822 if (d) {
1823 /* Detect signed overflow for subtraction. */
1824 tcg_gen_xor_i64(t0, reg, val);
1825 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1826 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1827 tcg_gen_and_i64(t0, t0, reg);
1828
1829 /* Bound the result. */
1830 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1831 t2 = tcg_constant_i64(0);
24e82e68
RH
1832 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1833 } else {
1834 /* Detect signed overflow for addition. */
1835 tcg_gen_xor_i64(t0, reg, val);
1836 tcg_gen_add_i64(reg, reg, val);
1837 tcg_gen_xor_i64(t1, reg, val);
1838 tcg_gen_andc_i64(t0, t1, t0);
1839
1840 /* Bound the result. */
1841 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1842 t2 = tcg_constant_i64(0);
24e82e68
RH
1843 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1844 }
35a1ec8e 1845 tcg_temp_free_i64(t1);
24e82e68
RH
1846 }
1847 tcg_temp_free_i64(t0);
24e82e68
RH
1848}
1849
1850/* Similarly with a vector and a scalar operand. */
1851static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1852 TCGv_i64 val, bool u, bool d)
1853{
1854 unsigned vsz = vec_full_reg_size(s);
1855 TCGv_ptr dptr, nptr;
1856 TCGv_i32 t32, desc;
1857 TCGv_i64 t64;
1858
1859 dptr = tcg_temp_new_ptr();
1860 nptr = tcg_temp_new_ptr();
1861 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1862 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1863 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1864
1865 switch (esz) {
1866 case MO_8:
1867 t32 = tcg_temp_new_i32();
1868 tcg_gen_extrl_i64_i32(t32, val);
1869 if (d) {
1870 tcg_gen_neg_i32(t32, t32);
1871 }
1872 if (u) {
1873 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1874 } else {
1875 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1876 }
1877 tcg_temp_free_i32(t32);
1878 break;
1879
1880 case MO_16:
1881 t32 = tcg_temp_new_i32();
1882 tcg_gen_extrl_i64_i32(t32, val);
1883 if (d) {
1884 tcg_gen_neg_i32(t32, t32);
1885 }
1886 if (u) {
1887 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1888 } else {
1889 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1890 }
1891 tcg_temp_free_i32(t32);
1892 break;
1893
1894 case MO_32:
1895 t64 = tcg_temp_new_i64();
1896 if (d) {
1897 tcg_gen_neg_i64(t64, val);
1898 } else {
1899 tcg_gen_mov_i64(t64, val);
1900 }
1901 if (u) {
1902 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1903 } else {
1904 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1905 }
1906 tcg_temp_free_i64(t64);
1907 break;
1908
1909 case MO_64:
1910 if (u) {
1911 if (d) {
1912 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1913 } else {
1914 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1915 }
1916 } else if (d) {
1917 t64 = tcg_temp_new_i64();
1918 tcg_gen_neg_i64(t64, val);
1919 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1920 tcg_temp_free_i64(t64);
1921 } else {
1922 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1923 }
1924 break;
1925
1926 default:
1927 g_assert_not_reached();
1928 }
1929
1930 tcg_temp_free_ptr(dptr);
1931 tcg_temp_free_ptr(nptr);
24e82e68
RH
1932}
1933
3a7be554 1934static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1935{
1936 if (sve_access_check(s)) {
1937 unsigned fullsz = vec_full_reg_size(s);
1938 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1939 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1940 }
1941 return true;
1942}
1943
3a7be554 1944static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1945{
1946 if (sve_access_check(s)) {
1947 unsigned fullsz = vec_full_reg_size(s);
1948 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1949 int inc = numelem * a->imm * (a->d ? -1 : 1);
1950 TCGv_i64 reg = cpu_reg(s, a->rd);
1951
1952 tcg_gen_addi_i64(reg, reg, inc);
1953 }
1954 return true;
1955}
1956
3a7be554 1957static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1958{
1959 if (!sve_access_check(s)) {
1960 return true;
1961 }
1962
1963 unsigned fullsz = vec_full_reg_size(s);
1964 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1965 int inc = numelem * a->imm;
1966 TCGv_i64 reg = cpu_reg(s, a->rd);
1967
1968 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1969 if (inc == 0) {
1970 if (a->u) {
1971 tcg_gen_ext32u_i64(reg, reg);
1972 } else {
1973 tcg_gen_ext32s_i64(reg, reg);
1974 }
1975 } else {
d681f125 1976 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1977 }
1978 return true;
1979}
1980
3a7be554 1981static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1982{
1983 if (!sve_access_check(s)) {
1984 return true;
1985 }
1986
1987 unsigned fullsz = vec_full_reg_size(s);
1988 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1989 int inc = numelem * a->imm;
1990 TCGv_i64 reg = cpu_reg(s, a->rd);
1991
1992 if (inc != 0) {
d681f125 1993 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1994 }
1995 return true;
1996}
1997
3a7be554 1998static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1999{
2000 if (a->esz == 0) {
2001 return false;
2002 }
2003
2004 unsigned fullsz = vec_full_reg_size(s);
2005 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2006 int inc = numelem * a->imm;
2007
2008 if (inc != 0) {
2009 if (sve_access_check(s)) {
24e82e68
RH
2010 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2011 vec_full_reg_offset(s, a->rn),
d681f125
RH
2012 tcg_constant_i64(a->d ? -inc : inc),
2013 fullsz, fullsz);
24e82e68
RH
2014 }
2015 } else {
2016 do_mov_z(s, a->rd, a->rn);
2017 }
2018 return true;
2019}
2020
3a7be554 2021static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2022{
2023 if (a->esz == 0) {
2024 return false;
2025 }
2026
2027 unsigned fullsz = vec_full_reg_size(s);
2028 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2029 int inc = numelem * a->imm;
2030
2031 if (inc != 0) {
2032 if (sve_access_check(s)) {
d681f125
RH
2033 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2034 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2035 }
2036 } else {
2037 do_mov_z(s, a->rd, a->rn);
2038 }
2039 return true;
2040}
2041
e1fa1164
RH
2042/*
2043 *** SVE Bitwise Immediate Group
2044 */
2045
2046static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2047{
2048 uint64_t imm;
2049 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2050 extract32(a->dbm, 0, 6),
2051 extract32(a->dbm, 6, 6))) {
2052 return false;
2053 }
2054 if (sve_access_check(s)) {
2055 unsigned vsz = vec_full_reg_size(s);
2056 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2057 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2058 }
2059 return true;
2060}
2061
3a7be554 2062static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2063{
2064 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2065}
2066
3a7be554 2067static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2068{
2069 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2070}
2071
3a7be554 2072static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
2073{
2074 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2075}
2076
3a7be554 2077static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2078{
2079 uint64_t imm;
2080 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2081 extract32(a->dbm, 0, 6),
2082 extract32(a->dbm, 6, 6))) {
2083 return false;
2084 }
2085 if (sve_access_check(s)) {
2086 do_dupi_z(s, a->rd, imm);
2087 }
2088 return true;
2089}
2090
f25a2361
RH
2091/*
2092 *** SVE Integer Wide Immediate - Predicated Group
2093 */
2094
2095/* Implement all merging copies. This is used for CPY (immediate),
2096 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2097 */
2098static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2099 TCGv_i64 val)
2100{
2101 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2102 static gen_cpy * const fns[4] = {
2103 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2104 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2105 };
2106 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2107 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2108 TCGv_ptr t_zd = tcg_temp_new_ptr();
2109 TCGv_ptr t_zn = tcg_temp_new_ptr();
2110 TCGv_ptr t_pg = tcg_temp_new_ptr();
2111
2112 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2113 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2114 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2115
2116 fns[esz](t_zd, t_zn, t_pg, val, desc);
2117
2118 tcg_temp_free_ptr(t_zd);
2119 tcg_temp_free_ptr(t_zn);
2120 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2121}
2122
3a7be554 2123static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2124{
2125 if (a->esz == 0) {
2126 return false;
2127 }
2128 if (sve_access_check(s)) {
2129 /* Decode the VFP immediate. */
2130 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2131 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2132 }
2133 return true;
2134}
2135
3a7be554 2136static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2137{
3a7be554 2138 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2139 return false;
2140 }
2141 if (sve_access_check(s)) {
e152b48b 2142 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2143 }
2144 return true;
2145}
2146
3a7be554 2147static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2148{
2149 static gen_helper_gvec_2i * const fns[4] = {
2150 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2151 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2152 };
2153
3a7be554 2154 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
2155 return false;
2156 }
2157 if (sve_access_check(s)) {
2158 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2159 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2160 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2161 tcg_constant_i64(a->imm),
2162 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2163 }
2164 return true;
2165}
2166
b94f8f60
RH
2167/*
2168 *** SVE Permute Extract Group
2169 */
2170
75114792 2171static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2172{
2173 if (!sve_access_check(s)) {
2174 return true;
2175 }
2176
2177 unsigned vsz = vec_full_reg_size(s);
75114792 2178 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2179 unsigned n_siz = vsz - n_ofs;
75114792
SL
2180 unsigned d = vec_full_reg_offset(s, rd);
2181 unsigned n = vec_full_reg_offset(s, rn);
2182 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2183
2184 /* Use host vector move insns if we have appropriate sizes
2185 * and no unfortunate overlap.
2186 */
2187 if (m != d
2188 && n_ofs == size_for_gvec(n_ofs)
2189 && n_siz == size_for_gvec(n_siz)
2190 && (d != n || n_siz <= n_ofs)) {
2191 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2192 if (n_ofs != 0) {
2193 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2194 }
2195 } else {
2196 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2197 }
2198 return true;
2199}
2200
75114792
SL
2201static bool trans_EXT(DisasContext *s, arg_EXT *a)
2202{
2203 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2204}
2205
2206static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2207{
2208 if (!dc_isar_feature(aa64_sve2, s)) {
2209 return false;
2210 }
2211 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2212}
2213
30562ab7
RH
2214/*
2215 *** SVE Permute - Unpredicated Group
2216 */
2217
3a7be554 2218static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2219{
2220 if (sve_access_check(s)) {
2221 unsigned vsz = vec_full_reg_size(s);
2222 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2223 vsz, vsz, cpu_reg_sp(s, a->rn));
2224 }
2225 return true;
2226}
2227
3a7be554 2228static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2229{
2230 if ((a->imm & 0x1f) == 0) {
2231 return false;
2232 }
2233 if (sve_access_check(s)) {
2234 unsigned vsz = vec_full_reg_size(s);
2235 unsigned dofs = vec_full_reg_offset(s, a->rd);
2236 unsigned esz, index;
2237
2238 esz = ctz32(a->imm);
2239 index = a->imm >> (esz + 1);
2240
2241 if ((index << esz) < vsz) {
2242 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2243 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2244 } else {
7e17d50e
RH
2245 /*
2246 * While dup_mem handles 128-bit elements, dup_imm does not.
2247 * Thankfully element size doesn't matter for splatting zero.
2248 */
2249 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2250 }
2251 }
2252 return true;
2253}
2254
2255static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2256{
2257 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2258 static gen_insr * const fns[4] = {
2259 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2260 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2261 };
2262 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2263 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2264 TCGv_ptr t_zd = tcg_temp_new_ptr();
2265 TCGv_ptr t_zn = tcg_temp_new_ptr();
2266
2267 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2268 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2269
2270 fns[a->esz](t_zd, t_zn, val, desc);
2271
2272 tcg_temp_free_ptr(t_zd);
2273 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2274}
2275
3a7be554 2276static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2277{
2278 if (sve_access_check(s)) {
2279 TCGv_i64 t = tcg_temp_new_i64();
2280 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2281 do_insr_i64(s, a, t);
2282 tcg_temp_free_i64(t);
2283 }
2284 return true;
2285}
2286
3a7be554 2287static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2288{
2289 if (sve_access_check(s)) {
2290 do_insr_i64(s, a, cpu_reg(s, a->rm));
2291 }
2292 return true;
2293}
2294
0ea3cdbf
RH
2295static gen_helper_gvec_2 * const rev_fns[4] = {
2296 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2297 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2298};
2299TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2300
32e2ad65
RH
2301static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2302 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2303 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2304};
2305TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2306
5f425b92
RH
2307static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2308 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2309 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2310};
2311TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2312 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2313
32e2ad65
RH
2314static gen_helper_gvec_3 * const tbx_fns[4] = {
2315 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2316 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2317};
2318TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2319
3a7be554 2320static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2321{
2322 static gen_helper_gvec_2 * const fns[4][2] = {
2323 { NULL, NULL },
2324 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2325 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2326 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2327 };
2328
2329 if (a->esz == 0) {
2330 return false;
2331 }
2332 if (sve_access_check(s)) {
2333 unsigned vsz = vec_full_reg_size(s);
2334 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2335 vec_full_reg_offset(s, a->rn)
2336 + (a->h ? vsz / 2 : 0),
2337 vsz, vsz, 0, fns[a->esz][a->u]);
2338 }
2339 return true;
2340}
2341
d731d8cb
RH
2342/*
2343 *** SVE Permute - Predicates Group
2344 */
2345
2346static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2347 gen_helper_gvec_3 *fn)
2348{
2349 if (!sve_access_check(s)) {
2350 return true;
2351 }
2352
2353 unsigned vsz = pred_full_reg_size(s);
2354
d731d8cb
RH
2355 TCGv_ptr t_d = tcg_temp_new_ptr();
2356 TCGv_ptr t_n = tcg_temp_new_ptr();
2357 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2358 uint32_t desc = 0;
d731d8cb 2359
f9b0fcce
RH
2360 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2361 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2362 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2363
2364 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2365 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2366 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2367
c6a59b55 2368 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2369
2370 tcg_temp_free_ptr(t_d);
2371 tcg_temp_free_ptr(t_n);
2372 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2373 return true;
2374}
2375
2376static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2377 gen_helper_gvec_2 *fn)
2378{
2379 if (!sve_access_check(s)) {
2380 return true;
2381 }
2382
2383 unsigned vsz = pred_full_reg_size(s);
2384 TCGv_ptr t_d = tcg_temp_new_ptr();
2385 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2386 uint32_t desc = 0;
d731d8cb
RH
2387
2388 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2389 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2390
70acaafe
RH
2391 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2392 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2393 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2394
c6a59b55 2395 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2396
d731d8cb
RH
2397 tcg_temp_free_ptr(t_d);
2398 tcg_temp_free_ptr(t_n);
2399 return true;
2400}
2401
3a7be554 2402static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2403{
2404 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2405}
2406
3a7be554 2407static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2408{
2409 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2410}
2411
3a7be554 2412static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2413{
2414 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2415}
2416
3a7be554 2417static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2418{
2419 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2420}
2421
3a7be554 2422static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2423{
2424 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2425}
2426
3a7be554 2427static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2428{
2429 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2430}
2431
3a7be554 2432static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2433{
2434 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2435}
2436
3a7be554 2437static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2438{
2439 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2440}
2441
3a7be554 2442static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2443{
2444 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2445}
2446
234b48e9
RH
2447/*
2448 *** SVE Permute - Interleaving Group
2449 */
2450
2451static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2452{
2453 static gen_helper_gvec_3 * const fns[4] = {
2454 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2455 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2456 };
2457
2458 if (sve_access_check(s)) {
2459 unsigned vsz = vec_full_reg_size(s);
2460 unsigned high_ofs = high ? vsz / 2 : 0;
2461 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2462 vec_full_reg_offset(s, a->rn) + high_ofs,
2463 vec_full_reg_offset(s, a->rm) + high_ofs,
2464 vsz, vsz, 0, fns[a->esz]);
2465 }
2466 return true;
2467}
2468
3a7be554 2469static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2470{
2471 return do_zip(s, a, false);
2472}
2473
3a7be554 2474static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2475{
2476 return do_zip(s, a, true);
2477}
2478
74b64b25
RH
2479static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2480{
2481 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2482 return false;
2483 }
2484 if (sve_access_check(s)) {
2485 unsigned vsz = vec_full_reg_size(s);
2486 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2487 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2488 vec_full_reg_offset(s, a->rn) + high_ofs,
2489 vec_full_reg_offset(s, a->rm) + high_ofs,
2490 vsz, vsz, 0, gen_helper_sve2_zip_q);
2491 }
2492 return true;
2493}
2494
2495static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2496{
2497 return do_zip_q(s, a, false);
2498}
2499
2500static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2501{
2502 return do_zip_q(s, a, true);
2503}
2504
234b48e9
RH
2505static gen_helper_gvec_3 * const uzp_fns[4] = {
2506 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2507 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2508};
2509
32e2ad65
RH
2510TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2511 uzp_fns[a->esz], a, 0)
2512TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2513 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2514
32e2ad65
RH
2515TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2516 gen_helper_sve2_uzp_q, a, 0)
2517TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2518 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2519
234b48e9
RH
2520static gen_helper_gvec_3 * const trn_fns[4] = {
2521 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2522 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2523};
2524
32e2ad65
RH
2525TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2526 trn_fns[a->esz], a, 0)
2527TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2528 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2529
32e2ad65
RH
2530TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2531 gen_helper_sve2_trn_q, a, 0)
2532TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2533 gen_helper_sve2_trn_q, a, 16)
74b64b25 2534
3ca879ae
RH
2535/*
2536 *** SVE Permute Vector - Predicated Group
2537 */
2538
817bd5c9
RH
2539static gen_helper_gvec_3 * const compact_fns[4] = {
2540 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2541};
2542TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2543
ef23cb72
RH
2544/* Call the helper that computes the ARM LastActiveElement pseudocode
2545 * function, scaled by the element size. This includes the not found
2546 * indication; e.g. not found for esz=3 is -8.
2547 */
2548static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2549{
2550 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2551 * round up, as we do elsewhere, because we need the exact size.
2552 */
2553 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2554 unsigned desc = 0;
ef23cb72 2555
2acbfbe4
RH
2556 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2557 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2558
2559 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2560
c6a59b55 2561 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2562
ef23cb72
RH
2563 tcg_temp_free_ptr(t_p);
2564}
2565
2566/* Increment LAST to the offset of the next element in the vector,
2567 * wrapping around to 0.
2568 */
2569static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2570{
2571 unsigned vsz = vec_full_reg_size(s);
2572
2573 tcg_gen_addi_i32(last, last, 1 << esz);
2574 if (is_power_of_2(vsz)) {
2575 tcg_gen_andi_i32(last, last, vsz - 1);
2576 } else {
4b308bd5
RH
2577 TCGv_i32 max = tcg_constant_i32(vsz);
2578 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2579 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2580 }
2581}
2582
2583/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2584static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2585{
2586 unsigned vsz = vec_full_reg_size(s);
2587
2588 if (is_power_of_2(vsz)) {
2589 tcg_gen_andi_i32(last, last, vsz - 1);
2590 } else {
4b308bd5
RH
2591 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2592 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2593 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2594 }
2595}
2596
2597/* Load an unsigned element of ESZ from BASE+OFS. */
2598static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2599{
2600 TCGv_i64 r = tcg_temp_new_i64();
2601
2602 switch (esz) {
2603 case 0:
2604 tcg_gen_ld8u_i64(r, base, ofs);
2605 break;
2606 case 1:
2607 tcg_gen_ld16u_i64(r, base, ofs);
2608 break;
2609 case 2:
2610 tcg_gen_ld32u_i64(r, base, ofs);
2611 break;
2612 case 3:
2613 tcg_gen_ld_i64(r, base, ofs);
2614 break;
2615 default:
2616 g_assert_not_reached();
2617 }
2618 return r;
2619}
2620
2621/* Load an unsigned element of ESZ from RM[LAST]. */
2622static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2623 int rm, int esz)
2624{
2625 TCGv_ptr p = tcg_temp_new_ptr();
2626 TCGv_i64 r;
2627
2628 /* Convert offset into vector into offset into ENV.
2629 * The final adjustment for the vector register base
2630 * is added via constant offset to the load.
2631 */
e03b5686 2632#if HOST_BIG_ENDIAN
ef23cb72
RH
2633 /* Adjust for element ordering. See vec_reg_offset. */
2634 if (esz < 3) {
2635 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2636 }
2637#endif
2638 tcg_gen_ext_i32_ptr(p, last);
2639 tcg_gen_add_ptr(p, p, cpu_env);
2640
2641 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2642 tcg_temp_free_ptr(p);
2643
2644 return r;
2645}
2646
2647/* Compute CLAST for a Zreg. */
2648static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2649{
2650 TCGv_i32 last;
2651 TCGLabel *over;
2652 TCGv_i64 ele;
2653 unsigned vsz, esz = a->esz;
2654
2655 if (!sve_access_check(s)) {
2656 return true;
2657 }
2658
2659 last = tcg_temp_local_new_i32();
2660 over = gen_new_label();
2661
2662 find_last_active(s, last, esz, a->pg);
2663
2664 /* There is of course no movcond for a 2048-bit vector,
2665 * so we must branch over the actual store.
2666 */
2667 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2668
2669 if (!before) {
2670 incr_last_active(s, last, esz);
2671 }
2672
2673 ele = load_last_active(s, last, a->rm, esz);
2674 tcg_temp_free_i32(last);
2675
2676 vsz = vec_full_reg_size(s);
2677 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2678 tcg_temp_free_i64(ele);
2679
2680 /* If this insn used MOVPRFX, we may need a second move. */
2681 if (a->rd != a->rn) {
2682 TCGLabel *done = gen_new_label();
2683 tcg_gen_br(done);
2684
2685 gen_set_label(over);
2686 do_mov_z(s, a->rd, a->rn);
2687
2688 gen_set_label(done);
2689 } else {
2690 gen_set_label(over);
2691 }
2692 return true;
2693}
2694
3a7be554 2695static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2696{
2697 return do_clast_vector(s, a, false);
2698}
2699
3a7be554 2700static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2701{
2702 return do_clast_vector(s, a, true);
2703}
2704
2705/* Compute CLAST for a scalar. */
2706static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2707 bool before, TCGv_i64 reg_val)
2708{
2709 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2710 TCGv_i64 ele, cmp;
ef23cb72
RH
2711
2712 find_last_active(s, last, esz, pg);
2713
2714 /* Extend the original value of last prior to incrementing. */
2715 cmp = tcg_temp_new_i64();
2716 tcg_gen_ext_i32_i64(cmp, last);
2717
2718 if (!before) {
2719 incr_last_active(s, last, esz);
2720 }
2721
2722 /* The conceit here is that while last < 0 indicates not found, after
2723 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2724 * from which we can load garbage. We then discard the garbage with
2725 * a conditional move.
2726 */
2727 ele = load_last_active(s, last, rm, esz);
2728 tcg_temp_free_i32(last);
2729
053552d3
RH
2730 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2731 ele, reg_val);
ef23cb72 2732
ef23cb72
RH
2733 tcg_temp_free_i64(cmp);
2734 tcg_temp_free_i64(ele);
2735}
2736
2737/* Compute CLAST for a Vreg. */
2738static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2739{
2740 if (sve_access_check(s)) {
2741 int esz = a->esz;
2742 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2743 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2744
2745 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2746 write_fp_dreg(s, a->rd, reg);
2747 tcg_temp_free_i64(reg);
2748 }
2749 return true;
2750}
2751
3a7be554 2752static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2753{
2754 return do_clast_fp(s, a, false);
2755}
2756
3a7be554 2757static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2758{
2759 return do_clast_fp(s, a, true);
2760}
2761
2762/* Compute CLAST for a Xreg. */
2763static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2764{
2765 TCGv_i64 reg;
2766
2767 if (!sve_access_check(s)) {
2768 return true;
2769 }
2770
2771 reg = cpu_reg(s, a->rd);
2772 switch (a->esz) {
2773 case 0:
2774 tcg_gen_ext8u_i64(reg, reg);
2775 break;
2776 case 1:
2777 tcg_gen_ext16u_i64(reg, reg);
2778 break;
2779 case 2:
2780 tcg_gen_ext32u_i64(reg, reg);
2781 break;
2782 case 3:
2783 break;
2784 default:
2785 g_assert_not_reached();
2786 }
2787
2788 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2789 return true;
2790}
2791
3a7be554 2792static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2793{
2794 return do_clast_general(s, a, false);
2795}
2796
3a7be554 2797static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2798{
2799 return do_clast_general(s, a, true);
2800}
2801
2802/* Compute LAST for a scalar. */
2803static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2804 int pg, int rm, bool before)
2805{
2806 TCGv_i32 last = tcg_temp_new_i32();
2807 TCGv_i64 ret;
2808
2809 find_last_active(s, last, esz, pg);
2810 if (before) {
2811 wrap_last_active(s, last, esz);
2812 } else {
2813 incr_last_active(s, last, esz);
2814 }
2815
2816 ret = load_last_active(s, last, rm, esz);
2817 tcg_temp_free_i32(last);
2818 return ret;
2819}
2820
2821/* Compute LAST for a Vreg. */
2822static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2823{
2824 if (sve_access_check(s)) {
2825 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2826 write_fp_dreg(s, a->rd, val);
2827 tcg_temp_free_i64(val);
2828 }
2829 return true;
2830}
2831
3a7be554 2832static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2833{
2834 return do_last_fp(s, a, false);
2835}
2836
3a7be554 2837static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2838{
2839 return do_last_fp(s, a, true);
2840}
2841
2842/* Compute LAST for a Xreg. */
2843static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2844{
2845 if (sve_access_check(s)) {
2846 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2847 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2848 tcg_temp_free_i64(val);
2849 }
2850 return true;
2851}
2852
3a7be554 2853static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2854{
2855 return do_last_general(s, a, false);
2856}
2857
3a7be554 2858static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2859{
2860 return do_last_general(s, a, true);
2861}
2862
3a7be554 2863static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2864{
2865 if (sve_access_check(s)) {
2866 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2867 }
2868 return true;
2869}
2870
3a7be554 2871static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2872{
2873 if (sve_access_check(s)) {
2874 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2875 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2876 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2877 tcg_temp_free_i64(t);
2878 }
2879 return true;
2880}
2881
817bd5c9
RH
2882static gen_helper_gvec_3 * const revb_fns[4] = {
2883 NULL, gen_helper_sve_revb_h,
2884 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2885};
2886TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2887
817bd5c9
RH
2888static gen_helper_gvec_3 * const revh_fns[4] = {
2889 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2890};
2891TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2892
817bd5c9
RH
2893TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2894 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2895
3a7be554 2896static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240 2897{
2a753d1e
RH
2898 return gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2899 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2900}
2901
75114792
SL
2902static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
2903{
2904 if (!dc_isar_feature(aa64_sve2, s)) {
2905 return false;
2906 }
2a753d1e
RH
2907 return gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2908 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
75114792
SL
2909}
2910
757f9cff
RH
2911/*
2912 *** SVE Integer Compare - Vectors Group
2913 */
2914
2915static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2916 gen_helper_gvec_flags_4 *gen_fn)
2917{
2918 TCGv_ptr pd, zn, zm, pg;
2919 unsigned vsz;
2920 TCGv_i32 t;
2921
2922 if (gen_fn == NULL) {
2923 return false;
2924 }
2925 if (!sve_access_check(s)) {
2926 return true;
2927 }
2928
2929 vsz = vec_full_reg_size(s);
392acacc 2930 t = tcg_temp_new_i32();
757f9cff
RH
2931 pd = tcg_temp_new_ptr();
2932 zn = tcg_temp_new_ptr();
2933 zm = tcg_temp_new_ptr();
2934 pg = tcg_temp_new_ptr();
2935
2936 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2937 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2938 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2939 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2940
392acacc 2941 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2942
2943 tcg_temp_free_ptr(pd);
2944 tcg_temp_free_ptr(zn);
2945 tcg_temp_free_ptr(zm);
2946 tcg_temp_free_ptr(pg);
2947
2948 do_pred_flags(t);
2949
2950 tcg_temp_free_i32(t);
2951 return true;
2952}
2953
2954#define DO_PPZZ(NAME, name) \
3a7be554 2955static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2956{ \
2957 static gen_helper_gvec_flags_4 * const fns[4] = { \
2958 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2959 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2960 }; \
2961 return do_ppzz_flags(s, a, fns[a->esz]); \
2962}
2963
2964DO_PPZZ(CMPEQ, cmpeq)
2965DO_PPZZ(CMPNE, cmpne)
2966DO_PPZZ(CMPGT, cmpgt)
2967DO_PPZZ(CMPGE, cmpge)
2968DO_PPZZ(CMPHI, cmphi)
2969DO_PPZZ(CMPHS, cmphs)
2970
2971#undef DO_PPZZ
2972
2973#define DO_PPZW(NAME, name) \
3a7be554 2974static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2975{ \
2976 static gen_helper_gvec_flags_4 * const fns[4] = { \
2977 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2978 gen_helper_sve_##name##_ppzw_s, NULL \
2979 }; \
2980 return do_ppzz_flags(s, a, fns[a->esz]); \
2981}
2982
2983DO_PPZW(CMPEQ, cmpeq)
2984DO_PPZW(CMPNE, cmpne)
2985DO_PPZW(CMPGT, cmpgt)
2986DO_PPZW(CMPGE, cmpge)
2987DO_PPZW(CMPHI, cmphi)
2988DO_PPZW(CMPHS, cmphs)
2989DO_PPZW(CMPLT, cmplt)
2990DO_PPZW(CMPLE, cmple)
2991DO_PPZW(CMPLO, cmplo)
2992DO_PPZW(CMPLS, cmpls)
2993
2994#undef DO_PPZW
2995
38cadeba
RH
2996/*
2997 *** SVE Integer Compare - Immediate Groups
2998 */
2999
3000static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3001 gen_helper_gvec_flags_3 *gen_fn)
3002{
3003 TCGv_ptr pd, zn, pg;
3004 unsigned vsz;
3005 TCGv_i32 t;
3006
3007 if (gen_fn == NULL) {
3008 return false;
3009 }
3010 if (!sve_access_check(s)) {
3011 return true;
3012 }
3013
3014 vsz = vec_full_reg_size(s);
392acacc 3015 t = tcg_temp_new_i32();
38cadeba
RH
3016 pd = tcg_temp_new_ptr();
3017 zn = tcg_temp_new_ptr();
3018 pg = tcg_temp_new_ptr();
3019
3020 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3021 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3022 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3023
392acacc 3024 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3025
3026 tcg_temp_free_ptr(pd);
3027 tcg_temp_free_ptr(zn);
3028 tcg_temp_free_ptr(pg);
3029
3030 do_pred_flags(t);
3031
3032 tcg_temp_free_i32(t);
3033 return true;
3034}
3035
3036#define DO_PPZI(NAME, name) \
3a7be554 3037static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
3038{ \
3039 static gen_helper_gvec_flags_3 * const fns[4] = { \
3040 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3041 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3042 }; \
3043 return do_ppzi_flags(s, a, fns[a->esz]); \
3044}
3045
3046DO_PPZI(CMPEQ, cmpeq)
3047DO_PPZI(CMPNE, cmpne)
3048DO_PPZI(CMPGT, cmpgt)
3049DO_PPZI(CMPGE, cmpge)
3050DO_PPZI(CMPHI, cmphi)
3051DO_PPZI(CMPHS, cmphs)
3052DO_PPZI(CMPLT, cmplt)
3053DO_PPZI(CMPLE, cmple)
3054DO_PPZI(CMPLO, cmplo)
3055DO_PPZI(CMPLS, cmpls)
3056
3057#undef DO_PPZI
3058
35da316f
RH
3059/*
3060 *** SVE Partition Break Group
3061 */
3062
3063static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3064 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3065{
3066 if (!sve_access_check(s)) {
3067 return true;
3068 }
3069
3070 unsigned vsz = pred_full_reg_size(s);
3071
3072 /* Predicate sizes may be smaller and cannot use simd_desc. */
3073 TCGv_ptr d = tcg_temp_new_ptr();
3074 TCGv_ptr n = tcg_temp_new_ptr();
3075 TCGv_ptr m = tcg_temp_new_ptr();
3076 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3077 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3078
3079 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3080 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3081 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3082 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3083
3084 if (a->s) {
93418f1c
RH
3085 TCGv_i32 t = tcg_temp_new_i32();
3086 fn_s(t, d, n, m, g, desc);
35da316f 3087 do_pred_flags(t);
93418f1c 3088 tcg_temp_free_i32(t);
35da316f 3089 } else {
93418f1c 3090 fn(d, n, m, g, desc);
35da316f
RH
3091 }
3092 tcg_temp_free_ptr(d);
3093 tcg_temp_free_ptr(n);
3094 tcg_temp_free_ptr(m);
3095 tcg_temp_free_ptr(g);
35da316f
RH
3096 return true;
3097}
3098
3099static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3100 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3101{
3102 if (!sve_access_check(s)) {
3103 return true;
3104 }
3105
3106 unsigned vsz = pred_full_reg_size(s);
3107
3108 /* Predicate sizes may be smaller and cannot use simd_desc. */
3109 TCGv_ptr d = tcg_temp_new_ptr();
3110 TCGv_ptr n = tcg_temp_new_ptr();
3111 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3112 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3113
3114 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3115 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3116 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3117
3118 if (a->s) {
93418f1c
RH
3119 TCGv_i32 t = tcg_temp_new_i32();
3120 fn_s(t, d, n, g, desc);
35da316f 3121 do_pred_flags(t);
93418f1c 3122 tcg_temp_free_i32(t);
35da316f 3123 } else {
93418f1c 3124 fn(d, n, g, desc);
35da316f
RH
3125 }
3126 tcg_temp_free_ptr(d);
3127 tcg_temp_free_ptr(n);
3128 tcg_temp_free_ptr(g);
35da316f
RH
3129 return true;
3130}
3131
3a7be554 3132static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3133{
3134 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3135}
3136
3a7be554 3137static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
3138{
3139 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3140}
3141
3a7be554 3142static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3143{
3144 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3145}
3146
3a7be554 3147static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3148{
3149 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3150}
3151
3a7be554 3152static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3153{
3154 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3155}
3156
3a7be554 3157static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3158{
3159 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3160}
3161
3a7be554 3162static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3163{
3164 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3165}
3166
9ee3a611
RH
3167/*
3168 *** SVE Predicate Count Group
3169 */
3170
3171static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3172{
3173 unsigned psz = pred_full_reg_size(s);
3174
3175 if (psz <= 8) {
3176 uint64_t psz_mask;
3177
3178 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3179 if (pn != pg) {
3180 TCGv_i64 g = tcg_temp_new_i64();
3181 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3182 tcg_gen_and_i64(val, val, g);
3183 tcg_temp_free_i64(g);
3184 }
3185
3186 /* Reduce the pred_esz_masks value simply to reduce the
3187 * size of the code generated here.
3188 */
3189 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3190 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3191
3192 tcg_gen_ctpop_i64(val, val);
3193 } else {
3194 TCGv_ptr t_pn = tcg_temp_new_ptr();
3195 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3196 unsigned desc = 0;
9ee3a611 3197
f556a201
RH
3198 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3199 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3200
3201 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3202 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3203
c6a59b55 3204 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3205 tcg_temp_free_ptr(t_pn);
3206 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3207 }
3208}
3209
3a7be554 3210static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3211{
3212 if (sve_access_check(s)) {
3213 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3214 }
3215 return true;
3216}
3217
3a7be554 3218static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3219{
3220 if (sve_access_check(s)) {
3221 TCGv_i64 reg = cpu_reg(s, a->rd);
3222 TCGv_i64 val = tcg_temp_new_i64();
3223
3224 do_cntp(s, val, a->esz, a->pg, a->pg);
3225 if (a->d) {
3226 tcg_gen_sub_i64(reg, reg, val);
3227 } else {
3228 tcg_gen_add_i64(reg, reg, val);
3229 }
3230 tcg_temp_free_i64(val);
3231 }
3232 return true;
3233}
3234
3a7be554 3235static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3236{
3237 if (a->esz == 0) {
3238 return false;
3239 }
3240 if (sve_access_check(s)) {
3241 unsigned vsz = vec_full_reg_size(s);
3242 TCGv_i64 val = tcg_temp_new_i64();
3243 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3244
3245 do_cntp(s, val, a->esz, a->pg, a->pg);
3246 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3247 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3248 }
3249 return true;
3250}
3251
3a7be554 3252static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3253{
3254 if (sve_access_check(s)) {
3255 TCGv_i64 reg = cpu_reg(s, a->rd);
3256 TCGv_i64 val = tcg_temp_new_i64();
3257
3258 do_cntp(s, val, a->esz, a->pg, a->pg);
3259 do_sat_addsub_32(reg, val, a->u, a->d);
3260 }
3261 return true;
3262}
3263
3a7be554 3264static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3265{
3266 if (sve_access_check(s)) {
3267 TCGv_i64 reg = cpu_reg(s, a->rd);
3268 TCGv_i64 val = tcg_temp_new_i64();
3269
3270 do_cntp(s, val, a->esz, a->pg, a->pg);
3271 do_sat_addsub_64(reg, val, a->u, a->d);
3272 }
3273 return true;
3274}
3275
3a7be554 3276static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3277{
3278 if (a->esz == 0) {
3279 return false;
3280 }
3281 if (sve_access_check(s)) {
3282 TCGv_i64 val = tcg_temp_new_i64();
3283 do_cntp(s, val, a->esz, a->pg, a->pg);
3284 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3285 }
3286 return true;
3287}
3288
caf1cefc
RH
3289/*
3290 *** SVE Integer Compare Scalars Group
3291 */
3292
3a7be554 3293static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3294{
3295 if (!sve_access_check(s)) {
3296 return true;
3297 }
3298
3299 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3300 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3301 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3302 TCGv_i64 cmp = tcg_temp_new_i64();
3303
3304 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3305 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3306 tcg_temp_free_i64(cmp);
3307
3308 /* VF = !NF & !CF. */
3309 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3310 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3311
3312 /* Both NF and VF actually look at bit 31. */
3313 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3314 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3315 return true;
3316}
3317
3a7be554 3318static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3319{
bbd0968c 3320 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3321 TCGv_i32 t2;
caf1cefc 3322 TCGv_ptr ptr;
e610906c
RH
3323 unsigned vsz = vec_full_reg_size(s);
3324 unsigned desc = 0;
caf1cefc 3325 TCGCond cond;
34688dbc
RH
3326 uint64_t maxval;
3327 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3328 bool eq = a->eq == a->lt;
caf1cefc 3329
34688dbc
RH
3330 /* The greater-than conditions are all SVE2. */
3331 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3332 return false;
3333 }
bbd0968c
RH
3334 if (!sve_access_check(s)) {
3335 return true;
3336 }
3337
3338 op0 = read_cpu_reg(s, a->rn, 1);
3339 op1 = read_cpu_reg(s, a->rm, 1);
3340
caf1cefc
RH
3341 if (!a->sf) {
3342 if (a->u) {
3343 tcg_gen_ext32u_i64(op0, op0);
3344 tcg_gen_ext32u_i64(op1, op1);
3345 } else {
3346 tcg_gen_ext32s_i64(op0, op0);
3347 tcg_gen_ext32s_i64(op1, op1);
3348 }
3349 }
3350
3351 /* For the helper, compress the different conditions into a computation
3352 * of how many iterations for which the condition is true.
caf1cefc 3353 */
bbd0968c
RH
3354 t0 = tcg_temp_new_i64();
3355 t1 = tcg_temp_new_i64();
34688dbc
RH
3356
3357 if (a->lt) {
3358 tcg_gen_sub_i64(t0, op1, op0);
3359 if (a->u) {
3360 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3361 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3362 } else {
3363 maxval = a->sf ? INT64_MAX : INT32_MAX;
3364 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3365 }
3366 } else {
3367 tcg_gen_sub_i64(t0, op0, op1);
3368 if (a->u) {
3369 maxval = 0;
3370 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3371 } else {
3372 maxval = a->sf ? INT64_MIN : INT32_MIN;
3373 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3374 }
3375 }
caf1cefc 3376
4481bbf2 3377 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3378 if (eq) {
caf1cefc
RH
3379 /* Equality means one more iteration. */
3380 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3381
34688dbc
RH
3382 /*
3383 * For the less-than while, if op1 is maxval (and the only time
3384 * the addition above could overflow), then we produce an all-true
3385 * predicate by setting the count to the vector length. This is
3386 * because the pseudocode is described as an increment + compare
3387 * loop, and the maximum integer would always compare true.
3388 * Similarly, the greater-than while has the same issue with the
3389 * minimum integer due to the decrement + compare loop.
bbd0968c 3390 */
34688dbc 3391 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3392 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3393 }
3394
bbd0968c
RH
3395 /* Bound to the maximum. */
3396 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3397
3398 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3399 tcg_gen_movi_i64(t1, 0);
3400 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3401 tcg_temp_free_i64(t1);
caf1cefc 3402
bbd0968c 3403 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3404 t2 = tcg_temp_new_i32();
3405 tcg_gen_extrl_i64_i32(t2, t0);
3406 tcg_temp_free_i64(t0);
bbd0968c
RH
3407
3408 /* Scale elements to bits. */
3409 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3410
e610906c
RH
3411 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3412 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3413
3414 ptr = tcg_temp_new_ptr();
3415 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3416
34688dbc 3417 if (a->lt) {
4481bbf2 3418 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3419 } else {
4481bbf2 3420 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3421 }
caf1cefc
RH
3422 do_pred_flags(t2);
3423
3424 tcg_temp_free_ptr(ptr);
3425 tcg_temp_free_i32(t2);
caf1cefc
RH
3426 return true;
3427}
3428
14f6dad1
RH
3429static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3430{
3431 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3432 TCGv_i32 t2;
14f6dad1
RH
3433 TCGv_ptr ptr;
3434 unsigned vsz = vec_full_reg_size(s);
3435 unsigned desc = 0;
3436
3437 if (!dc_isar_feature(aa64_sve2, s)) {
3438 return false;
3439 }
3440 if (!sve_access_check(s)) {
3441 return true;
3442 }
3443
3444 op0 = read_cpu_reg(s, a->rn, 1);
3445 op1 = read_cpu_reg(s, a->rm, 1);
3446
4481bbf2 3447 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3448 diff = tcg_temp_new_i64();
3449
3450 if (a->rw) {
3451 /* WHILERW */
3452 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3453 t1 = tcg_temp_new_i64();
3454 tcg_gen_sub_i64(diff, op0, op1);
3455 tcg_gen_sub_i64(t1, op1, op0);
3456 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3457 tcg_temp_free_i64(t1);
3458 /* Round down to a multiple of ESIZE. */
3459 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3460 /* If op1 == op0, diff == 0, and the condition is always true. */
3461 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3462 } else {
3463 /* WHILEWR */
3464 tcg_gen_sub_i64(diff, op1, op0);
3465 /* Round down to a multiple of ESIZE. */
3466 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3467 /* If op0 >= op1, diff <= 0, the condition is always true. */
3468 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3469 }
3470
3471 /* Bound to the maximum. */
3472 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3473
3474 /* Since we're bounded, pass as a 32-bit type. */
3475 t2 = tcg_temp_new_i32();
3476 tcg_gen_extrl_i64_i32(t2, diff);
3477 tcg_temp_free_i64(diff);
3478
3479 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3480 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3481
3482 ptr = tcg_temp_new_ptr();
3483 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3484
4481bbf2 3485 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3486 do_pred_flags(t2);
3487
3488 tcg_temp_free_ptr(ptr);
3489 tcg_temp_free_i32(t2);
14f6dad1
RH
3490 return true;
3491}
3492
ed491961
RH
3493/*
3494 *** SVE Integer Wide Immediate - Unpredicated Group
3495 */
3496
3a7be554 3497static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3498{
3499 if (a->esz == 0) {
3500 return false;
3501 }
3502 if (sve_access_check(s)) {
3503 unsigned vsz = vec_full_reg_size(s);
3504 int dofs = vec_full_reg_offset(s, a->rd);
3505 uint64_t imm;
3506
3507 /* Decode the VFP immediate. */
3508 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3509 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3510 }
3511 return true;
3512}
3513
3a7be554 3514static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3515{
3a7be554 3516 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3517 return false;
3518 }
3519 if (sve_access_check(s)) {
3520 unsigned vsz = vec_full_reg_size(s);
3521 int dofs = vec_full_reg_offset(s, a->rd);
3522
8711e71f 3523 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3524 }
3525 return true;
3526}
3527
3a7be554 3528static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3529{
3a7be554 3530 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3531 return false;
3532 }
3533 if (sve_access_check(s)) {
3534 unsigned vsz = vec_full_reg_size(s);
3535 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3536 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3537 }
3538 return true;
3539}
3540
3a7be554 3541static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3542{
3543 a->imm = -a->imm;
3a7be554 3544 return trans_ADD_zzi(s, a);
6e6a157d
RH
3545}
3546
3a7be554 3547static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3548{
53229a77 3549 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3550 static const GVecGen2s op[4] = {
3551 { .fni8 = tcg_gen_vec_sub8_i64,
3552 .fniv = tcg_gen_sub_vec,
3553 .fno = gen_helper_sve_subri_b,
53229a77 3554 .opt_opc = vecop_list,
6e6a157d
RH
3555 .vece = MO_8,
3556 .scalar_first = true },
3557 { .fni8 = tcg_gen_vec_sub16_i64,
3558 .fniv = tcg_gen_sub_vec,
3559 .fno = gen_helper_sve_subri_h,
53229a77 3560 .opt_opc = vecop_list,
6e6a157d
RH
3561 .vece = MO_16,
3562 .scalar_first = true },
3563 { .fni4 = tcg_gen_sub_i32,
3564 .fniv = tcg_gen_sub_vec,
3565 .fno = gen_helper_sve_subri_s,
53229a77 3566 .opt_opc = vecop_list,
6e6a157d
RH
3567 .vece = MO_32,
3568 .scalar_first = true },
3569 { .fni8 = tcg_gen_sub_i64,
3570 .fniv = tcg_gen_sub_vec,
3571 .fno = gen_helper_sve_subri_d,
53229a77 3572 .opt_opc = vecop_list,
6e6a157d
RH
3573 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3574 .vece = MO_64,
3575 .scalar_first = true }
3576 };
3577
3a7be554 3578 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3579 return false;
3580 }
3581 if (sve_access_check(s)) {
3582 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3583 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3584 vec_full_reg_offset(s, a->rn),
9fff3fcc 3585 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3586 }
3587 return true;
3588}
3589
3a7be554 3590static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3591{
3592 if (sve_access_check(s)) {
3593 unsigned vsz = vec_full_reg_size(s);
3594 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3595 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3596 }
3597 return true;
3598}
3599
3a7be554 3600static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3601{
3a7be554 3602 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3603 return false;
3604 }
3605 if (sve_access_check(s)) {
138a1f7b
RH
3606 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3607 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3608 }
3609 return true;
3610}
3611
3a7be554 3612static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3613{
3a7be554 3614 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3615}
3616
3a7be554 3617static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3618{
3a7be554 3619 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3620}
3621
3a7be554 3622static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3623{
3a7be554 3624 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3625}
3626
3a7be554 3627static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3628{
3a7be554 3629 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3630}
3631
3632static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3633{
3634 if (sve_access_check(s)) {
3635 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3636 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3637 vec_full_reg_offset(s, a->rn),
138a1f7b 3638 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3639 }
3640 return true;
3641}
3642
3643#define DO_ZZI(NAME, name) \
3a7be554 3644static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3645{ \
3646 static gen_helper_gvec_2i * const fns[4] = { \
3647 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3648 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3649 }; \
3650 return do_zzi_ool(s, a, fns[a->esz]); \
3651}
3652
3653DO_ZZI(SMAX, smax)
3654DO_ZZI(UMAX, umax)
3655DO_ZZI(SMIN, smin)
3656DO_ZZI(UMIN, umin)
3657
3658#undef DO_ZZI
3659
5f425b92
RH
3660static gen_helper_gvec_4 * const dot_fns[2][2] = {
3661 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3662 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3663};
3664TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3665 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3666
814d4c52
RH
3667/*
3668 * SVE Multiply - Indexed
3669 */
3670
f3500a25
RH
3671TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3672 gen_helper_gvec_sdot_idx_b, a)
3673TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3674 gen_helper_gvec_sdot_idx_h, a)
3675TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3676 gen_helper_gvec_udot_idx_b, a)
3677TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3678 gen_helper_gvec_udot_idx_h, a)
3679
3680TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3681 gen_helper_gvec_sudot_idx_b, a)
3682TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3683 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3684
814d4c52 3685#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3686 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3687 a->rd, a->rn, a->rm, a->index)
814d4c52 3688
af031f64
RH
3689DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3690DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3691DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3692
af031f64
RH
3693DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3694DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3695DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3696
af031f64
RH
3697DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3698DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3699DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3700
814d4c52
RH
3701#undef DO_SVE2_RRX
3702
b95f5eeb 3703#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3704 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3705 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3706
3707DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3708DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3709DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3710DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3711
3712DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3713DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3714DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3715DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3716
3717DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3718DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3719DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3720DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3721
b95f5eeb
RH
3722#undef DO_SVE2_RRX_TB
3723
8a02aac7 3724#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3725 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3726
8681eb76
RH
3727DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3728DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3729DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3730
8681eb76
RH
3731DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3732DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3733DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3734
8681eb76
RH
3735DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3736DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3737DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3738
8681eb76
RH
3739DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3740DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3741DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3742
8a02aac7
RH
3743#undef DO_SVE2_RRXR
3744
c5c455d7 3745#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3746 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3747 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3748
3749DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3750DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3751DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3752DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3753
3754DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3755DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3756DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3757DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3758
3759DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3760DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3761DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3762DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3763
3764DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3765DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3766DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3767DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3768
3769DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3770DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3771DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3772DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3773
3774DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3775DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3776DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3777DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3778
3779#undef DO_SVE2_RRXR_TB
3780
3b787ed8 3781#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3782 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3783 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3784
3785DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3786DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3787
3788DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3789DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3790
21068f39
RH
3791DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3792DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3793
3b787ed8
RH
3794#undef DO_SVE2_RRXR_ROT
3795
ca40a6e6
RH
3796/*
3797 *** SVE Floating Point Multiply-Add Indexed Group
3798 */
3799
0a82d963 3800static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6
RH
3801{
3802 static gen_helper_gvec_4_ptr * const fns[3] = {
3803 gen_helper_gvec_fmla_idx_h,
3804 gen_helper_gvec_fmla_idx_s,
3805 gen_helper_gvec_fmla_idx_d,
3806 };
3807
3808 if (sve_access_check(s)) {
3809 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3810 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3811 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3812 vec_full_reg_offset(s, a->rn),
3813 vec_full_reg_offset(s, a->rm),
3814 vec_full_reg_offset(s, a->ra),
0a82d963 3815 status, vsz, vsz, (a->index << 1) | sub,
ca40a6e6
RH
3816 fns[a->esz - 1]);
3817 tcg_temp_free_ptr(status);
3818 }
3819 return true;
3820}
3821
0a82d963
RH
3822static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3823{
3824 return do_FMLA_zzxz(s, a, false);
3825}
3826
3827static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3828{
3829 return do_FMLA_zzxz(s, a, true);
3830}
3831
ca40a6e6
RH
3832/*
3833 *** SVE Floating Point Multiply Indexed Group
3834 */
3835
3a7be554 3836static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3837{
3838 static gen_helper_gvec_3_ptr * const fns[3] = {
3839 gen_helper_gvec_fmul_idx_h,
3840 gen_helper_gvec_fmul_idx_s,
3841 gen_helper_gvec_fmul_idx_d,
3842 };
3843
3844 if (sve_access_check(s)) {
3845 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3846 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3847 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3848 vec_full_reg_offset(s, a->rn),
3849 vec_full_reg_offset(s, a->rm),
3850 status, vsz, vsz, a->index, fns[a->esz - 1]);
3851 tcg_temp_free_ptr(status);
3852 }
3853 return true;
3854}
3855
23fbe79f
RH
3856/*
3857 *** SVE Floating Point Fast Reduction Group
3858 */
3859
3860typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3861 TCGv_ptr, TCGv_i32);
3862
3863static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3864 gen_helper_fp_reduce *fn)
3865{
3866 unsigned vsz = vec_full_reg_size(s);
3867 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 3868 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3869 TCGv_ptr t_zn, t_pg, status;
3870 TCGv_i64 temp;
3871
3872 temp = tcg_temp_new_i64();
3873 t_zn = tcg_temp_new_ptr();
3874 t_pg = tcg_temp_new_ptr();
3875
3876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3878 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3879
3880 fn(temp, t_zn, t_pg, status, t_desc);
3881 tcg_temp_free_ptr(t_zn);
3882 tcg_temp_free_ptr(t_pg);
3883 tcg_temp_free_ptr(status);
23fbe79f
RH
3884
3885 write_fp_dreg(s, a->rd, temp);
3886 tcg_temp_free_i64(temp);
3887}
3888
3889#define DO_VPZ(NAME, name) \
3a7be554 3890static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3891{ \
3892 static gen_helper_fp_reduce * const fns[3] = { \
3893 gen_helper_sve_##name##_h, \
3894 gen_helper_sve_##name##_s, \
3895 gen_helper_sve_##name##_d, \
3896 }; \
3897 if (a->esz == 0) { \
3898 return false; \
3899 } \
3900 if (sve_access_check(s)) { \
3901 do_reduce(s, a, fns[a->esz - 1]); \
3902 } \
3903 return true; \
3904}
3905
3906DO_VPZ(FADDV, faddv)
3907DO_VPZ(FMINNMV, fminnmv)
3908DO_VPZ(FMAXNMV, fmaxnmv)
3909DO_VPZ(FMINV, fminv)
3910DO_VPZ(FMAXV, fmaxv)
3911
3887c038
RH
3912/*
3913 *** SVE Floating Point Unary Operations - Unpredicated Group
3914 */
3915
3916static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3917{
3918 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3919 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3920
3921 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3922 vec_full_reg_offset(s, a->rn),
3923 status, vsz, vsz, 0, fn);
3924 tcg_temp_free_ptr(status);
3925}
3926
3a7be554 3927static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3928{
3929 static gen_helper_gvec_2_ptr * const fns[3] = {
3930 gen_helper_gvec_frecpe_h,
3931 gen_helper_gvec_frecpe_s,
3932 gen_helper_gvec_frecpe_d,
3933 };
3934 if (a->esz == 0) {
3935 return false;
3936 }
3937 if (sve_access_check(s)) {
3938 do_zz_fp(s, a, fns[a->esz - 1]);
3939 }
3940 return true;
3941}
3942
3a7be554 3943static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3944{
3945 static gen_helper_gvec_2_ptr * const fns[3] = {
3946 gen_helper_gvec_frsqrte_h,
3947 gen_helper_gvec_frsqrte_s,
3948 gen_helper_gvec_frsqrte_d,
3949 };
3950 if (a->esz == 0) {
3951 return false;
3952 }
3953 if (sve_access_check(s)) {
3954 do_zz_fp(s, a, fns[a->esz - 1]);
3955 }
3956 return true;
3957}
3958
4d2e2a03
RH
3959/*
3960 *** SVE Floating Point Compare with Zero Group
3961 */
3962
3963static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3964 gen_helper_gvec_3_ptr *fn)
3965{
3966 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3967 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3968
3969 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3970 vec_full_reg_offset(s, a->rn),
3971 pred_full_reg_offset(s, a->pg),
3972 status, vsz, vsz, 0, fn);
3973 tcg_temp_free_ptr(status);
3974}
3975
3976#define DO_PPZ(NAME, name) \
3a7be554 3977static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3978{ \
3979 static gen_helper_gvec_3_ptr * const fns[3] = { \
3980 gen_helper_sve_##name##_h, \
3981 gen_helper_sve_##name##_s, \
3982 gen_helper_sve_##name##_d, \
3983 }; \
3984 if (a->esz == 0) { \
3985 return false; \
3986 } \
3987 if (sve_access_check(s)) { \
3988 do_ppz_fp(s, a, fns[a->esz - 1]); \
3989 } \
3990 return true; \
3991}
3992
3993DO_PPZ(FCMGE_ppz0, fcmge0)
3994DO_PPZ(FCMGT_ppz0, fcmgt0)
3995DO_PPZ(FCMLE_ppz0, fcmle0)
3996DO_PPZ(FCMLT_ppz0, fcmlt0)
3997DO_PPZ(FCMEQ_ppz0, fcmeq0)
3998DO_PPZ(FCMNE_ppz0, fcmne0)
3999
4000#undef DO_PPZ
4001
67fcd9ad
RH
4002/*
4003 *** SVE floating-point trig multiply-add coefficient
4004 */
4005
3a7be554 4006static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
4007{
4008 static gen_helper_gvec_3_ptr * const fns[3] = {
4009 gen_helper_sve_ftmad_h,
4010 gen_helper_sve_ftmad_s,
4011 gen_helper_sve_ftmad_d,
4012 };
4013
4014 if (a->esz == 0) {
4015 return false;
4016 }
4017 if (sve_access_check(s)) {
4018 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4019 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
4020 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4021 vec_full_reg_offset(s, a->rn),
4022 vec_full_reg_offset(s, a->rm),
4023 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4024 tcg_temp_free_ptr(status);
4025 }
4026 return true;
4027}
4028
7f9ddf64
RH
4029/*
4030 *** SVE Floating Point Accumulating Reduction Group
4031 */
4032
3a7be554 4033static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
4034{
4035 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4036 TCGv_ptr, TCGv_ptr, TCGv_i32);
4037 static fadda_fn * const fns[3] = {
4038 gen_helper_sve_fadda_h,
4039 gen_helper_sve_fadda_s,
4040 gen_helper_sve_fadda_d,
4041 };
4042 unsigned vsz = vec_full_reg_size(s);
4043 TCGv_ptr t_rm, t_pg, t_fpst;
4044 TCGv_i64 t_val;
4045 TCGv_i32 t_desc;
4046
4047 if (a->esz == 0) {
4048 return false;
4049 }
4050 if (!sve_access_check(s)) {
4051 return true;
4052 }
4053
4054 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4055 t_rm = tcg_temp_new_ptr();
4056 t_pg = tcg_temp_new_ptr();
4057 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4058 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 4059 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4060 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
4061
4062 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4063
7f9ddf64
RH
4064 tcg_temp_free_ptr(t_fpst);
4065 tcg_temp_free_ptr(t_pg);
4066 tcg_temp_free_ptr(t_rm);
4067
4068 write_fp_dreg(s, a->rd, t_val);
4069 tcg_temp_free_i64(t_val);
4070 return true;
4071}
4072
29b80469
RH
4073/*
4074 *** SVE Floating Point Arithmetic - Unpredicated Group
4075 */
4076
4077static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4078 gen_helper_gvec_3_ptr *fn)
4079{
4080 if (fn == NULL) {
4081 return false;
4082 }
4083 if (sve_access_check(s)) {
4084 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4085 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
4086 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4087 vec_full_reg_offset(s, a->rn),
4088 vec_full_reg_offset(s, a->rm),
4089 status, vsz, vsz, 0, fn);
4090 tcg_temp_free_ptr(status);
4091 }
4092 return true;
4093}
4094
4095
4096#define DO_FP3(NAME, name) \
3a7be554 4097static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
4098{ \
4099 static gen_helper_gvec_3_ptr * const fns[4] = { \
4100 NULL, gen_helper_gvec_##name##_h, \
4101 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4102 }; \
4103 return do_zzz_fp(s, a, fns[a->esz]); \
4104}
4105
4106DO_FP3(FADD_zzz, fadd)
4107DO_FP3(FSUB_zzz, fsub)
4108DO_FP3(FMUL_zzz, fmul)
4109DO_FP3(FTSMUL, ftsmul)
4110DO_FP3(FRECPS, recps)
4111DO_FP3(FRSQRTS, rsqrts)
4112
4113#undef DO_FP3
4114
ec3b87c2
RH
4115/*
4116 *** SVE Floating Point Arithmetic - Predicated Group
4117 */
4118
4119static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4120 gen_helper_gvec_4_ptr *fn)
4121{
4122 if (fn == NULL) {
4123 return false;
4124 }
4125 if (sve_access_check(s)) {
4126 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4127 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
4128 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4129 vec_full_reg_offset(s, a->rn),
4130 vec_full_reg_offset(s, a->rm),
4131 pred_full_reg_offset(s, a->pg),
4132 status, vsz, vsz, 0, fn);
4133 tcg_temp_free_ptr(status);
4134 }
4135 return true;
4136}
4137
4138#define DO_FP3(NAME, name) \
3a7be554 4139static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
4140{ \
4141 static gen_helper_gvec_4_ptr * const fns[4] = { \
4142 NULL, gen_helper_sve_##name##_h, \
4143 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4144 }; \
4145 return do_zpzz_fp(s, a, fns[a->esz]); \
4146}
4147
4148DO_FP3(FADD_zpzz, fadd)
4149DO_FP3(FSUB_zpzz, fsub)
4150DO_FP3(FMUL_zpzz, fmul)
4151DO_FP3(FMIN_zpzz, fmin)
4152DO_FP3(FMAX_zpzz, fmax)
4153DO_FP3(FMINNM_zpzz, fminnum)
4154DO_FP3(FMAXNM_zpzz, fmaxnum)
4155DO_FP3(FABD, fabd)
4156DO_FP3(FSCALE, fscalbn)
4157DO_FP3(FDIV, fdiv)
4158DO_FP3(FMULX, fmulx)
4159
4160#undef DO_FP3
8092c6a3 4161
cc48affe
RH
4162typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4163 TCGv_i64, TCGv_ptr, TCGv_i32);
4164
4165static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4166 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4167{
4168 unsigned vsz = vec_full_reg_size(s);
4169 TCGv_ptr t_zd, t_zn, t_pg, status;
4170 TCGv_i32 desc;
4171
4172 t_zd = tcg_temp_new_ptr();
4173 t_zn = tcg_temp_new_ptr();
4174 t_pg = tcg_temp_new_ptr();
4175 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4176 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4177 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4178
cdfb22bb 4179 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4180 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4181 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4182
cc48affe
RH
4183 tcg_temp_free_ptr(status);
4184 tcg_temp_free_ptr(t_pg);
4185 tcg_temp_free_ptr(t_zn);
4186 tcg_temp_free_ptr(t_zd);
4187}
4188
4189static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4190 gen_helper_sve_fp2scalar *fn)
4191{
138a1f7b
RH
4192 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4193 tcg_constant_i64(imm), fn);
cc48affe
RH
4194}
4195
4196#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 4197static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
4198{ \
4199 static gen_helper_sve_fp2scalar * const fns[3] = { \
4200 gen_helper_sve_##name##_h, \
4201 gen_helper_sve_##name##_s, \
4202 gen_helper_sve_##name##_d \
4203 }; \
4204 static uint64_t const val[3][2] = { \
4205 { float16_##const0, float16_##const1 }, \
4206 { float32_##const0, float32_##const1 }, \
4207 { float64_##const0, float64_##const1 }, \
4208 }; \
4209 if (a->esz == 0) { \
4210 return false; \
4211 } \
4212 if (sve_access_check(s)) { \
4213 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4214 } \
4215 return true; \
4216}
4217
cc48affe
RH
4218DO_FP_IMM(FADD, fadds, half, one)
4219DO_FP_IMM(FSUB, fsubs, half, one)
4220DO_FP_IMM(FMUL, fmuls, half, two)
4221DO_FP_IMM(FSUBR, fsubrs, half, one)
4222DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4223DO_FP_IMM(FMINNM, fminnms, zero, one)
4224DO_FP_IMM(FMAX, fmaxs, zero, one)
4225DO_FP_IMM(FMIN, fmins, zero, one)
4226
4227#undef DO_FP_IMM
4228
abfdefd5
RH
4229static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4230 gen_helper_gvec_4_ptr *fn)
4231{
4232 if (fn == NULL) {
4233 return false;
4234 }
4235 if (sve_access_check(s)) {
4236 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4237 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4238 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4239 vec_full_reg_offset(s, a->rn),
4240 vec_full_reg_offset(s, a->rm),
4241 pred_full_reg_offset(s, a->pg),
4242 status, vsz, vsz, 0, fn);
4243 tcg_temp_free_ptr(status);
4244 }
4245 return true;
4246}
4247
4248#define DO_FPCMP(NAME, name) \
3a7be554 4249static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
4250{ \
4251 static gen_helper_gvec_4_ptr * const fns[4] = { \
4252 NULL, gen_helper_sve_##name##_h, \
4253 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4254 }; \
4255 return do_fp_cmp(s, a, fns[a->esz]); \
4256}
4257
4258DO_FPCMP(FCMGE, fcmge)
4259DO_FPCMP(FCMGT, fcmgt)
4260DO_FPCMP(FCMEQ, fcmeq)
4261DO_FPCMP(FCMNE, fcmne)
4262DO_FPCMP(FCMUO, fcmuo)
4263DO_FPCMP(FACGE, facge)
4264DO_FPCMP(FACGT, facgt)
4265
4266#undef DO_FPCMP
4267
3a7be554 4268static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
4269{
4270 static gen_helper_gvec_4_ptr * const fns[3] = {
4271 gen_helper_sve_fcadd_h,
4272 gen_helper_sve_fcadd_s,
4273 gen_helper_sve_fcadd_d
4274 };
4275
4276 if (a->esz == 0) {
4277 return false;
4278 }
4279 if (sve_access_check(s)) {
4280 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4281 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
4282 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4283 vec_full_reg_offset(s, a->rn),
4284 vec_full_reg_offset(s, a->rm),
4285 pred_full_reg_offset(s, a->pg),
4286 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4287 tcg_temp_free_ptr(status);
4288 }
4289 return true;
4290}
4291
08975da9
RH
4292static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4293 gen_helper_gvec_5_ptr *fn)
6ceabaad 4294{
08975da9 4295 if (a->esz == 0) {
6ceabaad
RH
4296 return false;
4297 }
08975da9
RH
4298 if (sve_access_check(s)) {
4299 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4300 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4301 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4302 vec_full_reg_offset(s, a->rn),
4303 vec_full_reg_offset(s, a->rm),
4304 vec_full_reg_offset(s, a->ra),
4305 pred_full_reg_offset(s, a->pg),
4306 status, vsz, vsz, 0, fn);
4307 tcg_temp_free_ptr(status);
6ceabaad 4308 }
6ceabaad
RH
4309 return true;
4310}
4311
4312#define DO_FMLA(NAME, name) \
3a7be554 4313static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4314{ \
08975da9 4315 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4316 NULL, gen_helper_sve_##name##_h, \
4317 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4318 }; \
4319 return do_fmla(s, a, fns[a->esz]); \
4320}
4321
4322DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4323DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4324DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4325DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4326
4327#undef DO_FMLA
4328
3a7be554 4329static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4330{
08975da9
RH
4331 static gen_helper_gvec_5_ptr * const fns[4] = {
4332 NULL,
05f48bab
RH
4333 gen_helper_sve_fcmla_zpzzz_h,
4334 gen_helper_sve_fcmla_zpzzz_s,
4335 gen_helper_sve_fcmla_zpzzz_d,
4336 };
4337
4338 if (a->esz == 0) {
4339 return false;
4340 }
4341 if (sve_access_check(s)) {
4342 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4343 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4344 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4345 vec_full_reg_offset(s, a->rn),
4346 vec_full_reg_offset(s, a->rm),
4347 vec_full_reg_offset(s, a->ra),
4348 pred_full_reg_offset(s, a->pg),
4349 status, vsz, vsz, a->rot, fns[a->esz]);
4350 tcg_temp_free_ptr(status);
05f48bab
RH
4351 }
4352 return true;
4353}
4354
3a7be554 4355static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4356{
636ddeb1 4357 static gen_helper_gvec_4_ptr * const fns[2] = {
18fc2405
RH
4358 gen_helper_gvec_fcmlah_idx,
4359 gen_helper_gvec_fcmlas_idx,
4360 };
4361
4362 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4363 tcg_debug_assert(a->rd == a->ra);
4364 if (sve_access_check(s)) {
4365 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4366 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
636ddeb1 4367 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
18fc2405
RH
4368 vec_full_reg_offset(s, a->rn),
4369 vec_full_reg_offset(s, a->rm),
636ddeb1 4370 vec_full_reg_offset(s, a->ra),
18fc2405
RH
4371 status, vsz, vsz,
4372 a->index * 4 + a->rot,
4373 fns[a->esz - 1]);
4374 tcg_temp_free_ptr(status);
4375 }
4376 return true;
4377}
4378
8092c6a3
RH
4379/*
4380 *** SVE Floating Point Unary Operations Predicated Group
4381 */
4382
4383static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4384 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4385{
4386 if (sve_access_check(s)) {
4387 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4388 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4389 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4390 vec_full_reg_offset(s, rn),
4391 pred_full_reg_offset(s, pg),
4392 status, vsz, vsz, 0, fn);
4393 tcg_temp_free_ptr(status);
4394 }
4395 return true;
4396}
4397
3a7be554 4398static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4399{
e4ab5124 4400 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4401}
4402
3a7be554 4403static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4404{
4405 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4406}
4407
d29b17ca
RH
4408static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4409{
4410 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4411 return false;
4412 }
4413 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4414}
4415
3a7be554 4416static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4417{
e4ab5124 4418 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4419}
4420
3a7be554 4421static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4422{
4423 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4424}
4425
3a7be554 4426static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4427{
4428 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4429}
4430
3a7be554 4431static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4432{
4433 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4434}
4435
3a7be554 4436static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4437{
4438 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4439}
4440
3a7be554 4441static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4442{
4443 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4444}
4445
3a7be554 4446static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4447{
4448 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4449}
4450
3a7be554 4451static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4452{
4453 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4454}
4455
3a7be554 4456static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4457{
4458 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4459}
4460
3a7be554 4461static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4462{
4463 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4464}
4465
3a7be554 4466static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4467{
4468 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4469}
4470
3a7be554 4471static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4472{
4473 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4474}
4475
3a7be554 4476static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4477{
4478 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4479}
4480
3a7be554 4481static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4482{
4483 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4484}
4485
3a7be554 4486static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4487{
4488 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4489}
4490
3a7be554 4491static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4492{
4493 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4494}
4495
3a7be554 4496static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4497{
4498 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4499}
4500
3a7be554 4501static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4502{
4503 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4504}
4505
cda3c753
RH
4506static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4507 gen_helper_sve_frint_h,
4508 gen_helper_sve_frint_s,
4509 gen_helper_sve_frint_d
4510};
4511
3a7be554 4512static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4513{
4514 if (a->esz == 0) {
4515 return false;
4516 }
4517 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4518 frint_fns[a->esz - 1]);
4519}
4520
3a7be554 4521static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4522{
4523 static gen_helper_gvec_3_ptr * const fns[3] = {
4524 gen_helper_sve_frintx_h,
4525 gen_helper_sve_frintx_s,
4526 gen_helper_sve_frintx_d
4527 };
4528 if (a->esz == 0) {
4529 return false;
4530 }
4531 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4532}
4533
95365277
SL
4534static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4535 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4536{
cda3c753
RH
4537 if (sve_access_check(s)) {
4538 unsigned vsz = vec_full_reg_size(s);
4539 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4540 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4541
4542 gen_helper_set_rmode(tmode, tmode, status);
4543
4544 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4545 vec_full_reg_offset(s, a->rn),
4546 pred_full_reg_offset(s, a->pg),
95365277 4547 status, vsz, vsz, 0, fn);
cda3c753
RH
4548
4549 gen_helper_set_rmode(tmode, tmode, status);
4550 tcg_temp_free_i32(tmode);
4551 tcg_temp_free_ptr(status);
4552 }
4553 return true;
4554}
4555
3a7be554 4556static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4557{
95365277
SL
4558 if (a->esz == 0) {
4559 return false;
4560 }
4561 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4562}
4563
3a7be554 4564static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4565{
95365277
SL
4566 if (a->esz == 0) {
4567 return false;
4568 }
4569 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4570}
4571
3a7be554 4572static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4573{
95365277
SL
4574 if (a->esz == 0) {
4575 return false;
4576 }
4577 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4578}
4579
3a7be554 4580static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4581{
95365277
SL
4582 if (a->esz == 0) {
4583 return false;
4584 }
4585 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4586}
4587
3a7be554 4588static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4589{
95365277
SL
4590 if (a->esz == 0) {
4591 return false;
4592 }
4593 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4594}
4595
3a7be554 4596static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4597{
4598 static gen_helper_gvec_3_ptr * const fns[3] = {
4599 gen_helper_sve_frecpx_h,
4600 gen_helper_sve_frecpx_s,
4601 gen_helper_sve_frecpx_d
4602 };
4603 if (a->esz == 0) {
4604 return false;
4605 }
4606 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4607}
4608
3a7be554 4609static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4610{
4611 static gen_helper_gvec_3_ptr * const fns[3] = {
4612 gen_helper_sve_fsqrt_h,
4613 gen_helper_sve_fsqrt_s,
4614 gen_helper_sve_fsqrt_d
4615 };
4616 if (a->esz == 0) {
4617 return false;
4618 }
4619 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4620}
4621
3a7be554 4622static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4623{
4624 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4625}
4626
3a7be554 4627static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4628{
4629 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4630}
4631
3a7be554 4632static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4633{
4634 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4635}
4636
3a7be554 4637static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4638{
4639 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4640}
4641
3a7be554 4642static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4643{
4644 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4645}
4646
3a7be554 4647static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4648{
4649 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4650}
4651
3a7be554 4652static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4653{
4654 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4655}
4656
3a7be554 4657static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4658{
4659 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4660}
4661
3a7be554 4662static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4663{
4664 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4665}
4666
3a7be554 4667static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4668{
4669 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4670}
4671
3a7be554 4672static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4673{
4674 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4675}
4676
3a7be554 4677static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4678{
4679 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4680}
4681
3a7be554 4682static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4683{
4684 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4685}
4686
3a7be554 4687static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4688{
4689 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4690}
4691
d1822297
RH
4692/*
4693 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4694 */
4695
4696/* Subroutine loading a vector register at VOFS of LEN bytes.
4697 * The load should begin at the address Rn + IMM.
4698 */
4699
19f2acc9 4700static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4701{
19f2acc9
RH
4702 int len_align = QEMU_ALIGN_DOWN(len, 8);
4703 int len_remain = len % 8;
4704 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4705 int midx = get_mem_index(s);
b2aa8879 4706 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4707
b2aa8879
RH
4708 dirty_addr = tcg_temp_new_i64();
4709 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4710 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4711 tcg_temp_free_i64(dirty_addr);
d1822297 4712
b2aa8879
RH
4713 /*
4714 * Note that unpredicated load/store of vector/predicate registers
d1822297 4715 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4716 * operations on larger quantities.
d1822297
RH
4717 * Attempt to keep code expansion to a minimum by limiting the
4718 * amount of unrolling done.
4719 */
4720 if (nparts <= 4) {
4721 int i;
4722
b2aa8879 4723 t0 = tcg_temp_new_i64();
d1822297 4724 for (i = 0; i < len_align; i += 8) {
fc313c64 4725 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4726 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4727 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4728 }
b2aa8879 4729 tcg_temp_free_i64(t0);
d1822297
RH
4730 } else {
4731 TCGLabel *loop = gen_new_label();
4732 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4733
b2aa8879
RH
4734 /* Copy the clean address into a local temp, live across the loop. */
4735 t0 = clean_addr;
4b4dc975 4736 clean_addr = new_tmp_a64_local(s);
b2aa8879 4737 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4738
b2aa8879 4739 gen_set_label(loop);
d1822297 4740
b2aa8879 4741 t0 = tcg_temp_new_i64();
fc313c64 4742 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4743 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4744
b2aa8879 4745 tp = tcg_temp_new_ptr();
d1822297
RH
4746 tcg_gen_add_ptr(tp, cpu_env, i);
4747 tcg_gen_addi_ptr(i, i, 8);
4748 tcg_gen_st_i64(t0, tp, vofs);
4749 tcg_temp_free_ptr(tp);
b2aa8879 4750 tcg_temp_free_i64(t0);
d1822297
RH
4751
4752 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4753 tcg_temp_free_ptr(i);
4754 }
4755
b2aa8879
RH
4756 /*
4757 * Predicate register loads can be any multiple of 2.
d1822297
RH
4758 * Note that we still store the entire 64-bit unit into cpu_env.
4759 */
4760 if (len_remain) {
b2aa8879 4761 t0 = tcg_temp_new_i64();
d1822297
RH
4762 switch (len_remain) {
4763 case 2:
4764 case 4:
4765 case 8:
b2aa8879
RH
4766 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4767 MO_LE | ctz32(len_remain));
d1822297
RH
4768 break;
4769
4770 case 6:
4771 t1 = tcg_temp_new_i64();
b2aa8879
RH
4772 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4773 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4774 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4775 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4776 tcg_temp_free_i64(t1);
4777 break;
4778
4779 default:
4780 g_assert_not_reached();
4781 }
4782 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4783 tcg_temp_free_i64(t0);
d1822297 4784 }
d1822297
RH
4785}
4786
5047c204 4787/* Similarly for stores. */
19f2acc9 4788static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4789{
19f2acc9
RH
4790 int len_align = QEMU_ALIGN_DOWN(len, 8);
4791 int len_remain = len % 8;
4792 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4793 int midx = get_mem_index(s);
bba87d0a 4794 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4795
bba87d0a
RH
4796 dirty_addr = tcg_temp_new_i64();
4797 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4798 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4799 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4800
4801 /* Note that unpredicated load/store of vector/predicate registers
4802 * are defined as a stream of bytes, which equates to little-endian
4803 * operations on larger quantities. There is no nice way to force
4804 * a little-endian store for aarch64_be-linux-user out of line.
4805 *
4806 * Attempt to keep code expansion to a minimum by limiting the
4807 * amount of unrolling done.
4808 */
4809 if (nparts <= 4) {
4810 int i;
4811
bba87d0a 4812 t0 = tcg_temp_new_i64();
5047c204
RH
4813 for (i = 0; i < len_align; i += 8) {
4814 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4815 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4816 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4817 }
bba87d0a 4818 tcg_temp_free_i64(t0);
5047c204
RH
4819 } else {
4820 TCGLabel *loop = gen_new_label();
bba87d0a 4821 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4822
bba87d0a
RH
4823 /* Copy the clean address into a local temp, live across the loop. */
4824 t0 = clean_addr;
4b4dc975 4825 clean_addr = new_tmp_a64_local(s);
bba87d0a 4826 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4827
bba87d0a 4828 gen_set_label(loop);
5047c204 4829
bba87d0a
RH
4830 t0 = tcg_temp_new_i64();
4831 tp = tcg_temp_new_ptr();
4832 tcg_gen_add_ptr(tp, cpu_env, i);
4833 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4834 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4835 tcg_temp_free_ptr(tp);
4836
fc313c64 4837 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4838 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4839 tcg_temp_free_i64(t0);
5047c204
RH
4840
4841 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4842 tcg_temp_free_ptr(i);
4843 }
4844
4845 /* Predicate register stores can be any multiple of 2. */
4846 if (len_remain) {
bba87d0a 4847 t0 = tcg_temp_new_i64();
5047c204 4848 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4849
4850 switch (len_remain) {
4851 case 2:
4852 case 4:
4853 case 8:
bba87d0a
RH
4854 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4855 MO_LE | ctz32(len_remain));
5047c204
RH
4856 break;
4857
4858 case 6:
bba87d0a
RH
4859 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4860 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4861 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4862 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4863 break;
4864
4865 default:
4866 g_assert_not_reached();
4867 }
bba87d0a 4868 tcg_temp_free_i64(t0);
5047c204 4869 }
5047c204
RH
4870}
4871
3a7be554 4872static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4873{
4874 if (sve_access_check(s)) {
4875 int size = vec_full_reg_size(s);
4876 int off = vec_full_reg_offset(s, a->rd);
4877 do_ldr(s, off, size, a->rn, a->imm * size);
4878 }
4879 return true;
4880}
4881
3a7be554 4882static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4883{
4884 if (sve_access_check(s)) {
4885 int size = pred_full_reg_size(s);
4886 int off = pred_full_reg_offset(s, a->rd);
4887 do_ldr(s, off, size, a->rn, a->imm * size);
4888 }
4889 return true;
4890}
c4e7c493 4891
3a7be554 4892static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4893{
4894 if (sve_access_check(s)) {
4895 int size = vec_full_reg_size(s);
4896 int off = vec_full_reg_offset(s, a->rd);
4897 do_str(s, off, size, a->rn, a->imm * size);
4898 }
4899 return true;
4900}
4901
3a7be554 4902static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4903{
4904 if (sve_access_check(s)) {
4905 int size = pred_full_reg_size(s);
4906 int off = pred_full_reg_offset(s, a->rd);
4907 do_str(s, off, size, a->rn, a->imm * size);
4908 }
4909 return true;
4910}
4911
c4e7c493
RH
4912/*
4913 *** SVE Memory - Contiguous Load Group
4914 */
4915
4916/* The memory mode of the dtype. */
14776ab5 4917static const MemOp dtype_mop[16] = {
c4e7c493
RH
4918 MO_UB, MO_UB, MO_UB, MO_UB,
4919 MO_SL, MO_UW, MO_UW, MO_UW,
4920 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4921 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4922};
4923
4924#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4925
4926/* The vector element size of dtype. */
4927static const uint8_t dtype_esz[16] = {
4928 0, 1, 2, 3,
4929 3, 1, 2, 3,
4930 3, 2, 2, 3,
4931 3, 2, 1, 3
4932};
4933
4934static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4935 int dtype, uint32_t mte_n, bool is_write,
4936 gen_helper_gvec_mem *fn)
c4e7c493
RH
4937{
4938 unsigned vsz = vec_full_reg_size(s);
4939 TCGv_ptr t_pg;
206adacf 4940 int desc = 0;
c4e7c493 4941
206adacf
RH
4942 /*
4943 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4944 * registers as pointers, so encode the regno into the data field.
4945 * For consistency, do this even for LD1.
4946 */
9473d0ec 4947 if (s->mte_active[0]) {
206adacf
RH
4948 int msz = dtype_msz(dtype);
4949
4950 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4951 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4952 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4953 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4954 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4955 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4956 } else {
4957 addr = clean_data_tbi(s, addr);
206adacf 4958 }
9473d0ec 4959
206adacf 4960 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4961 t_pg = tcg_temp_new_ptr();
4962
4963 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4964 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4965
4966 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4967}
4968
c182c6db
RH
4969/* Indexed by [mte][be][dtype][nreg] */
4970static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4971 { /* mte inactive, little-endian */
4972 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4973 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4974 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4975 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4976 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4977
4978 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4979 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4980 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4981 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4982 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4983
4984 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4985 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4986 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4987 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4988 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4989
4990 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4991 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4992 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4993 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4994 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4995
4996 /* mte inactive, big-endian */
4997 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4998 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4999 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5000 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5001 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5002
5003 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5004 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5005 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5006 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5007 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5008
5009 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5010 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5011 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5012 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5013 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5014
5015 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5016 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5017 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5018 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5019 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5020
5021 { /* mte active, little-endian */
5022 { { gen_helper_sve_ld1bb_r_mte,
5023 gen_helper_sve_ld2bb_r_mte,
5024 gen_helper_sve_ld3bb_r_mte,
5025 gen_helper_sve_ld4bb_r_mte },
5026 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5027 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5028 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5029
5030 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5031 { gen_helper_sve_ld1hh_le_r_mte,
5032 gen_helper_sve_ld2hh_le_r_mte,
5033 gen_helper_sve_ld3hh_le_r_mte,
5034 gen_helper_sve_ld4hh_le_r_mte },
5035 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5036 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5037
5038 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5039 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5040 { gen_helper_sve_ld1ss_le_r_mte,
5041 gen_helper_sve_ld2ss_le_r_mte,
5042 gen_helper_sve_ld3ss_le_r_mte,
5043 gen_helper_sve_ld4ss_le_r_mte },
5044 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5045
5046 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5047 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5048 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5049 { gen_helper_sve_ld1dd_le_r_mte,
5050 gen_helper_sve_ld2dd_le_r_mte,
5051 gen_helper_sve_ld3dd_le_r_mte,
5052 gen_helper_sve_ld4dd_le_r_mte } },
5053
5054 /* mte active, big-endian */
5055 { { gen_helper_sve_ld1bb_r_mte,
5056 gen_helper_sve_ld2bb_r_mte,
5057 gen_helper_sve_ld3bb_r_mte,
5058 gen_helper_sve_ld4bb_r_mte },
5059 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5060 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5061 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5062
5063 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5064 { gen_helper_sve_ld1hh_be_r_mte,
5065 gen_helper_sve_ld2hh_be_r_mte,
5066 gen_helper_sve_ld3hh_be_r_mte,
5067 gen_helper_sve_ld4hh_be_r_mte },
5068 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5069 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5070
5071 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5072 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5073 { gen_helper_sve_ld1ss_be_r_mte,
5074 gen_helper_sve_ld2ss_be_r_mte,
5075 gen_helper_sve_ld3ss_be_r_mte,
5076 gen_helper_sve_ld4ss_be_r_mte },
5077 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5078
5079 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5080 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5081 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5082 { gen_helper_sve_ld1dd_be_r_mte,
5083 gen_helper_sve_ld2dd_be_r_mte,
5084 gen_helper_sve_ld3dd_be_r_mte,
5085 gen_helper_sve_ld4dd_be_r_mte } } },
5086};
5087
c4e7c493
RH
5088static void do_ld_zpa(DisasContext *s, int zt, int pg,
5089 TCGv_i64 addr, int dtype, int nreg)
5090{
206adacf 5091 gen_helper_gvec_mem *fn
c182c6db 5092 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 5093
206adacf
RH
5094 /*
5095 * While there are holes in the table, they are not
c4e7c493
RH
5096 * accessible via the instruction encoding.
5097 */
5098 assert(fn != NULL);
206adacf 5099 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
5100}
5101
3a7be554 5102static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
5103{
5104 if (a->rm == 31) {
5105 return false;
5106 }
5107 if (sve_access_check(s)) {
5108 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5109 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
5110 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5111 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5112 }
5113 return true;
5114}
5115
3a7be554 5116static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
5117{
5118 if (sve_access_check(s)) {
5119 int vsz = vec_full_reg_size(s);
5120 int elements = vsz >> dtype_esz[a->dtype];
5121 TCGv_i64 addr = new_tmp_a64(s);
5122
5123 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5124 (a->imm * elements * (a->nreg + 1))
5125 << dtype_msz(a->dtype));
5126 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5127 }
5128 return true;
5129}
e2654d75 5130
3a7be554 5131static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 5132{
aa13f7c3
RH
5133 static gen_helper_gvec_mem * const fns[2][2][16] = {
5134 { /* mte inactive, little-endian */
5135 { gen_helper_sve_ldff1bb_r,
5136 gen_helper_sve_ldff1bhu_r,
5137 gen_helper_sve_ldff1bsu_r,
5138 gen_helper_sve_ldff1bdu_r,
5139
5140 gen_helper_sve_ldff1sds_le_r,
5141 gen_helper_sve_ldff1hh_le_r,
5142 gen_helper_sve_ldff1hsu_le_r,
5143 gen_helper_sve_ldff1hdu_le_r,
5144
5145 gen_helper_sve_ldff1hds_le_r,
5146 gen_helper_sve_ldff1hss_le_r,
5147 gen_helper_sve_ldff1ss_le_r,
5148 gen_helper_sve_ldff1sdu_le_r,
5149
5150 gen_helper_sve_ldff1bds_r,
5151 gen_helper_sve_ldff1bss_r,
5152 gen_helper_sve_ldff1bhs_r,
5153 gen_helper_sve_ldff1dd_le_r },
5154
5155 /* mte inactive, big-endian */
5156 { gen_helper_sve_ldff1bb_r,
5157 gen_helper_sve_ldff1bhu_r,
5158 gen_helper_sve_ldff1bsu_r,
5159 gen_helper_sve_ldff1bdu_r,
5160
5161 gen_helper_sve_ldff1sds_be_r,
5162 gen_helper_sve_ldff1hh_be_r,
5163 gen_helper_sve_ldff1hsu_be_r,
5164 gen_helper_sve_ldff1hdu_be_r,
5165
5166 gen_helper_sve_ldff1hds_be_r,
5167 gen_helper_sve_ldff1hss_be_r,
5168 gen_helper_sve_ldff1ss_be_r,
5169 gen_helper_sve_ldff1sdu_be_r,
5170
5171 gen_helper_sve_ldff1bds_r,
5172 gen_helper_sve_ldff1bss_r,
5173 gen_helper_sve_ldff1bhs_r,
5174 gen_helper_sve_ldff1dd_be_r } },
5175
5176 { /* mte active, little-endian */
5177 { gen_helper_sve_ldff1bb_r_mte,
5178 gen_helper_sve_ldff1bhu_r_mte,
5179 gen_helper_sve_ldff1bsu_r_mte,
5180 gen_helper_sve_ldff1bdu_r_mte,
5181
5182 gen_helper_sve_ldff1sds_le_r_mte,
5183 gen_helper_sve_ldff1hh_le_r_mte,
5184 gen_helper_sve_ldff1hsu_le_r_mte,
5185 gen_helper_sve_ldff1hdu_le_r_mte,
5186
5187 gen_helper_sve_ldff1hds_le_r_mte,
5188 gen_helper_sve_ldff1hss_le_r_mte,
5189 gen_helper_sve_ldff1ss_le_r_mte,
5190 gen_helper_sve_ldff1sdu_le_r_mte,
5191
5192 gen_helper_sve_ldff1bds_r_mte,
5193 gen_helper_sve_ldff1bss_r_mte,
5194 gen_helper_sve_ldff1bhs_r_mte,
5195 gen_helper_sve_ldff1dd_le_r_mte },
5196
5197 /* mte active, big-endian */
5198 { gen_helper_sve_ldff1bb_r_mte,
5199 gen_helper_sve_ldff1bhu_r_mte,
5200 gen_helper_sve_ldff1bsu_r_mte,
5201 gen_helper_sve_ldff1bdu_r_mte,
5202
5203 gen_helper_sve_ldff1sds_be_r_mte,
5204 gen_helper_sve_ldff1hh_be_r_mte,
5205 gen_helper_sve_ldff1hsu_be_r_mte,
5206 gen_helper_sve_ldff1hdu_be_r_mte,
5207
5208 gen_helper_sve_ldff1hds_be_r_mte,
5209 gen_helper_sve_ldff1hss_be_r_mte,
5210 gen_helper_sve_ldff1ss_be_r_mte,
5211 gen_helper_sve_ldff1sdu_be_r_mte,
5212
5213 gen_helper_sve_ldff1bds_r_mte,
5214 gen_helper_sve_ldff1bss_r_mte,
5215 gen_helper_sve_ldff1bhs_r_mte,
5216 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
5217 };
5218
5219 if (sve_access_check(s)) {
5220 TCGv_i64 addr = new_tmp_a64(s);
5221 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5222 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
5223 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5224 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5225 }
5226 return true;
5227}
5228
3a7be554 5229static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 5230{
aa13f7c3
RH
5231 static gen_helper_gvec_mem * const fns[2][2][16] = {
5232 { /* mte inactive, little-endian */
5233 { gen_helper_sve_ldnf1bb_r,
5234 gen_helper_sve_ldnf1bhu_r,
5235 gen_helper_sve_ldnf1bsu_r,
5236 gen_helper_sve_ldnf1bdu_r,
5237
5238 gen_helper_sve_ldnf1sds_le_r,
5239 gen_helper_sve_ldnf1hh_le_r,
5240 gen_helper_sve_ldnf1hsu_le_r,
5241 gen_helper_sve_ldnf1hdu_le_r,
5242
5243 gen_helper_sve_ldnf1hds_le_r,
5244 gen_helper_sve_ldnf1hss_le_r,
5245 gen_helper_sve_ldnf1ss_le_r,
5246 gen_helper_sve_ldnf1sdu_le_r,
5247
5248 gen_helper_sve_ldnf1bds_r,
5249 gen_helper_sve_ldnf1bss_r,
5250 gen_helper_sve_ldnf1bhs_r,
5251 gen_helper_sve_ldnf1dd_le_r },
5252
5253 /* mte inactive, big-endian */
5254 { gen_helper_sve_ldnf1bb_r,
5255 gen_helper_sve_ldnf1bhu_r,
5256 gen_helper_sve_ldnf1bsu_r,
5257 gen_helper_sve_ldnf1bdu_r,
5258
5259 gen_helper_sve_ldnf1sds_be_r,
5260 gen_helper_sve_ldnf1hh_be_r,
5261 gen_helper_sve_ldnf1hsu_be_r,
5262 gen_helper_sve_ldnf1hdu_be_r,
5263
5264 gen_helper_sve_ldnf1hds_be_r,
5265 gen_helper_sve_ldnf1hss_be_r,
5266 gen_helper_sve_ldnf1ss_be_r,
5267 gen_helper_sve_ldnf1sdu_be_r,
5268
5269 gen_helper_sve_ldnf1bds_r,
5270 gen_helper_sve_ldnf1bss_r,
5271 gen_helper_sve_ldnf1bhs_r,
5272 gen_helper_sve_ldnf1dd_be_r } },
5273
5274 { /* mte inactive, little-endian */
5275 { gen_helper_sve_ldnf1bb_r_mte,
5276 gen_helper_sve_ldnf1bhu_r_mte,
5277 gen_helper_sve_ldnf1bsu_r_mte,
5278 gen_helper_sve_ldnf1bdu_r_mte,
5279
5280 gen_helper_sve_ldnf1sds_le_r_mte,
5281 gen_helper_sve_ldnf1hh_le_r_mte,
5282 gen_helper_sve_ldnf1hsu_le_r_mte,
5283 gen_helper_sve_ldnf1hdu_le_r_mte,
5284
5285 gen_helper_sve_ldnf1hds_le_r_mte,
5286 gen_helper_sve_ldnf1hss_le_r_mte,
5287 gen_helper_sve_ldnf1ss_le_r_mte,
5288 gen_helper_sve_ldnf1sdu_le_r_mte,
5289
5290 gen_helper_sve_ldnf1bds_r_mte,
5291 gen_helper_sve_ldnf1bss_r_mte,
5292 gen_helper_sve_ldnf1bhs_r_mte,
5293 gen_helper_sve_ldnf1dd_le_r_mte },
5294
5295 /* mte inactive, big-endian */
5296 { gen_helper_sve_ldnf1bb_r_mte,
5297 gen_helper_sve_ldnf1bhu_r_mte,
5298 gen_helper_sve_ldnf1bsu_r_mte,
5299 gen_helper_sve_ldnf1bdu_r_mte,
5300
5301 gen_helper_sve_ldnf1sds_be_r_mte,
5302 gen_helper_sve_ldnf1hh_be_r_mte,
5303 gen_helper_sve_ldnf1hsu_be_r_mte,
5304 gen_helper_sve_ldnf1hdu_be_r_mte,
5305
5306 gen_helper_sve_ldnf1hds_be_r_mte,
5307 gen_helper_sve_ldnf1hss_be_r_mte,
5308 gen_helper_sve_ldnf1ss_be_r_mte,
5309 gen_helper_sve_ldnf1sdu_be_r_mte,
5310
5311 gen_helper_sve_ldnf1bds_r_mte,
5312 gen_helper_sve_ldnf1bss_r_mte,
5313 gen_helper_sve_ldnf1bhs_r_mte,
5314 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
5315 };
5316
5317 if (sve_access_check(s)) {
5318 int vsz = vec_full_reg_size(s);
5319 int elements = vsz >> dtype_esz[a->dtype];
5320 int off = (a->imm * elements) << dtype_msz(a->dtype);
5321 TCGv_i64 addr = new_tmp_a64(s);
5322
5323 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
5324 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5325 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
5326 }
5327 return true;
5328}
1a039c7e 5329
c182c6db 5330static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 5331{
05abe304
RH
5332 unsigned vsz = vec_full_reg_size(s);
5333 TCGv_ptr t_pg;
7924d239 5334 int poff;
05abe304
RH
5335
5336 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5337 poff = pred_full_reg_offset(s, pg);
5338 if (vsz > 16) {
5339 /*
5340 * Zero-extend the first 16 bits of the predicate into a temporary.
5341 * This avoids triggering an assert making sure we don't have bits
5342 * set within a predicate beyond VQ, but we have lowered VQ to 1
5343 * for this load operation.
5344 */
5345 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5346#if HOST_BIG_ENDIAN
2a99ab2b
RH
5347 poff += 6;
5348#endif
5349 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5350
5351 poff = offsetof(CPUARMState, vfp.preg_tmp);
5352 tcg_gen_st_i64(tmp, cpu_env, poff);
5353 tcg_temp_free_i64(tmp);
5354 }
5355
05abe304 5356 t_pg = tcg_temp_new_ptr();
2a99ab2b 5357 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5358
c182c6db
RH
5359 gen_helper_gvec_mem *fn
5360 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5361 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5362
5363 tcg_temp_free_ptr(t_pg);
05abe304
RH
5364
5365 /* Replicate that first quadword. */
5366 if (vsz > 16) {
7924d239
RH
5367 int doff = vec_full_reg_offset(s, zt);
5368 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5369 }
5370}
5371
3a7be554 5372static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5373{
5374 if (a->rm == 31) {
5375 return false;
5376 }
5377 if (sve_access_check(s)) {
5378 int msz = dtype_msz(a->dtype);
5379 TCGv_i64 addr = new_tmp_a64(s);
5380 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5381 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5382 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5383 }
5384 return true;
5385}
5386
3a7be554 5387static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5388{
5389 if (sve_access_check(s)) {
5390 TCGv_i64 addr = new_tmp_a64(s);
5391 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5392 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5393 }
5394 return true;
5395}
5396
12c563f6
RH
5397static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5398{
5399 unsigned vsz = vec_full_reg_size(s);
5400 unsigned vsz_r32;
5401 TCGv_ptr t_pg;
5402 int poff, doff;
5403
5404 if (vsz < 32) {
5405 /*
5406 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5407 * in the ARM pseudocode, which is the sve_access_check() done
5408 * in our caller. We should not now return false from the caller.
5409 */
5410 unallocated_encoding(s);
5411 return;
5412 }
5413
5414 /* Load the first octaword using the normal predicated load helpers. */
5415
5416 poff = pred_full_reg_offset(s, pg);
5417 if (vsz > 32) {
5418 /*
5419 * Zero-extend the first 32 bits of the predicate into a temporary.
5420 * This avoids triggering an assert making sure we don't have bits
5421 * set within a predicate beyond VQ, but we have lowered VQ to 2
5422 * for this load operation.
5423 */
5424 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5425#if HOST_BIG_ENDIAN
12c563f6
RH
5426 poff += 4;
5427#endif
5428 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5429
5430 poff = offsetof(CPUARMState, vfp.preg_tmp);
5431 tcg_gen_st_i64(tmp, cpu_env, poff);
5432 tcg_temp_free_i64(tmp);
5433 }
5434
5435 t_pg = tcg_temp_new_ptr();
5436 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5437
5438 gen_helper_gvec_mem *fn
5439 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5440 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5441
5442 tcg_temp_free_ptr(t_pg);
5443
5444 /*
5445 * Replicate that first octaword.
5446 * The replication happens in units of 32; if the full vector size
5447 * is not a multiple of 32, the final bits are zeroed.
5448 */
5449 doff = vec_full_reg_offset(s, zt);
5450 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5451 if (vsz >= 64) {
5452 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5453 }
5454 vsz -= vsz_r32;
5455 if (vsz) {
5456 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5457 }
5458}
5459
5460static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5461{
5462 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5463 return false;
5464 }
5465 if (a->rm == 31) {
5466 return false;
5467 }
5468 if (sve_access_check(s)) {
5469 TCGv_i64 addr = new_tmp_a64(s);
5470 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5471 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5472 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5473 }
5474 return true;
5475}
5476
5477static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5478{
5479 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5480 return false;
5481 }
5482 if (sve_access_check(s)) {
5483 TCGv_i64 addr = new_tmp_a64(s);
5484 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5485 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5486 }
5487 return true;
5488}
5489
68459864 5490/* Load and broadcast element. */
3a7be554 5491static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5492{
68459864
RH
5493 unsigned vsz = vec_full_reg_size(s);
5494 unsigned psz = pred_full_reg_size(s);
5495 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5496 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5497 TCGLabel *over;
4ac430e1 5498 TCGv_i64 temp, clean_addr;
68459864 5499
c0ed9166
RH
5500 if (!sve_access_check(s)) {
5501 return true;
5502 }
5503
5504 over = gen_new_label();
5505
68459864
RH
5506 /* If the guarding predicate has no bits set, no load occurs. */
5507 if (psz <= 8) {
5508 /* Reduce the pred_esz_masks value simply to reduce the
5509 * size of the code generated here.
5510 */
5511 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5512 temp = tcg_temp_new_i64();
5513 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5514 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5515 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5516 tcg_temp_free_i64(temp);
5517 } else {
5518 TCGv_i32 t32 = tcg_temp_new_i32();
5519 find_last_active(s, t32, esz, a->pg);
5520 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5521 tcg_temp_free_i32(t32);
5522 }
5523
5524 /* Load the data. */
5525 temp = tcg_temp_new_i64();
d0e372b0 5526 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5527 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5528
5529 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5530 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5531
5532 /* Broadcast to *all* elements. */
5533 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5534 vsz, vsz, temp);
5535 tcg_temp_free_i64(temp);
5536
5537 /* Zero the inactive elements. */
5538 gen_set_label(over);
60245996 5539 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5540}
5541
1a039c7e
RH
5542static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5543 int msz, int esz, int nreg)
5544{
71b9f394
RH
5545 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5546 { { { gen_helper_sve_st1bb_r,
5547 gen_helper_sve_st1bh_r,
5548 gen_helper_sve_st1bs_r,
5549 gen_helper_sve_st1bd_r },
5550 { NULL,
5551 gen_helper_sve_st1hh_le_r,
5552 gen_helper_sve_st1hs_le_r,
5553 gen_helper_sve_st1hd_le_r },
5554 { NULL, NULL,
5555 gen_helper_sve_st1ss_le_r,
5556 gen_helper_sve_st1sd_le_r },
5557 { NULL, NULL, NULL,
5558 gen_helper_sve_st1dd_le_r } },
5559 { { gen_helper_sve_st1bb_r,
5560 gen_helper_sve_st1bh_r,
5561 gen_helper_sve_st1bs_r,
5562 gen_helper_sve_st1bd_r },
5563 { NULL,
5564 gen_helper_sve_st1hh_be_r,
5565 gen_helper_sve_st1hs_be_r,
5566 gen_helper_sve_st1hd_be_r },
5567 { NULL, NULL,
5568 gen_helper_sve_st1ss_be_r,
5569 gen_helper_sve_st1sd_be_r },
5570 { NULL, NULL, NULL,
5571 gen_helper_sve_st1dd_be_r } } },
5572
5573 { { { gen_helper_sve_st1bb_r_mte,
5574 gen_helper_sve_st1bh_r_mte,
5575 gen_helper_sve_st1bs_r_mte,
5576 gen_helper_sve_st1bd_r_mte },
5577 { NULL,
5578 gen_helper_sve_st1hh_le_r_mte,
5579 gen_helper_sve_st1hs_le_r_mte,
5580 gen_helper_sve_st1hd_le_r_mte },
5581 { NULL, NULL,
5582 gen_helper_sve_st1ss_le_r_mte,
5583 gen_helper_sve_st1sd_le_r_mte },
5584 { NULL, NULL, NULL,
5585 gen_helper_sve_st1dd_le_r_mte } },
5586 { { gen_helper_sve_st1bb_r_mte,
5587 gen_helper_sve_st1bh_r_mte,
5588 gen_helper_sve_st1bs_r_mte,
5589 gen_helper_sve_st1bd_r_mte },
5590 { NULL,
5591 gen_helper_sve_st1hh_be_r_mte,
5592 gen_helper_sve_st1hs_be_r_mte,
5593 gen_helper_sve_st1hd_be_r_mte },
5594 { NULL, NULL,
5595 gen_helper_sve_st1ss_be_r_mte,
5596 gen_helper_sve_st1sd_be_r_mte },
5597 { NULL, NULL, NULL,
5598 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5599 };
71b9f394
RH
5600 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5601 { { { gen_helper_sve_st2bb_r,
5602 gen_helper_sve_st2hh_le_r,
5603 gen_helper_sve_st2ss_le_r,
5604 gen_helper_sve_st2dd_le_r },
5605 { gen_helper_sve_st3bb_r,
5606 gen_helper_sve_st3hh_le_r,
5607 gen_helper_sve_st3ss_le_r,
5608 gen_helper_sve_st3dd_le_r },
5609 { gen_helper_sve_st4bb_r,
5610 gen_helper_sve_st4hh_le_r,
5611 gen_helper_sve_st4ss_le_r,
5612 gen_helper_sve_st4dd_le_r } },
5613 { { gen_helper_sve_st2bb_r,
5614 gen_helper_sve_st2hh_be_r,
5615 gen_helper_sve_st2ss_be_r,
5616 gen_helper_sve_st2dd_be_r },
5617 { gen_helper_sve_st3bb_r,
5618 gen_helper_sve_st3hh_be_r,
5619 gen_helper_sve_st3ss_be_r,
5620 gen_helper_sve_st3dd_be_r },
5621 { gen_helper_sve_st4bb_r,
5622 gen_helper_sve_st4hh_be_r,
5623 gen_helper_sve_st4ss_be_r,
5624 gen_helper_sve_st4dd_be_r } } },
5625 { { { gen_helper_sve_st2bb_r_mte,
5626 gen_helper_sve_st2hh_le_r_mte,
5627 gen_helper_sve_st2ss_le_r_mte,
5628 gen_helper_sve_st2dd_le_r_mte },
5629 { gen_helper_sve_st3bb_r_mte,
5630 gen_helper_sve_st3hh_le_r_mte,
5631 gen_helper_sve_st3ss_le_r_mte,
5632 gen_helper_sve_st3dd_le_r_mte },
5633 { gen_helper_sve_st4bb_r_mte,
5634 gen_helper_sve_st4hh_le_r_mte,
5635 gen_helper_sve_st4ss_le_r_mte,
5636 gen_helper_sve_st4dd_le_r_mte } },
5637 { { gen_helper_sve_st2bb_r_mte,
5638 gen_helper_sve_st2hh_be_r_mte,
5639 gen_helper_sve_st2ss_be_r_mte,
5640 gen_helper_sve_st2dd_be_r_mte },
5641 { gen_helper_sve_st3bb_r_mte,
5642 gen_helper_sve_st3hh_be_r_mte,
5643 gen_helper_sve_st3ss_be_r_mte,
5644 gen_helper_sve_st3dd_be_r_mte },
5645 { gen_helper_sve_st4bb_r_mte,
5646 gen_helper_sve_st4hh_be_r_mte,
5647 gen_helper_sve_st4ss_be_r_mte,
5648 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5649 };
5650 gen_helper_gvec_mem *fn;
28d57f2d 5651 int be = s->be_data == MO_BE;
1a039c7e
RH
5652
5653 if (nreg == 0) {
5654 /* ST1 */
71b9f394
RH
5655 fn = fn_single[s->mte_active[0]][be][msz][esz];
5656 nreg = 1;
1a039c7e
RH
5657 } else {
5658 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5659 assert(msz == esz);
71b9f394 5660 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5661 }
5662 assert(fn != NULL);
71b9f394 5663 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5664}
5665
3a7be554 5666static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5667{
5668 if (a->rm == 31 || a->msz > a->esz) {
5669 return false;
5670 }
5671 if (sve_access_check(s)) {
5672 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5673 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5674 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5675 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5676 }
5677 return true;
5678}
5679
3a7be554 5680static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5681{
5682 if (a->msz > a->esz) {
5683 return false;
5684 }
5685 if (sve_access_check(s)) {
5686 int vsz = vec_full_reg_size(s);
5687 int elements = vsz >> a->esz;
5688 TCGv_i64 addr = new_tmp_a64(s);
5689
5690 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5691 (a->imm * elements * (a->nreg + 1)) << a->msz);
5692 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5693 }
5694 return true;
5695}
f6dbf62a
RH
5696
5697/*
5698 *** SVE gather loads / scatter stores
5699 */
5700
500d0484 5701static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5702 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5703 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5704{
5705 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5706 TCGv_ptr t_zm = tcg_temp_new_ptr();
5707 TCGv_ptr t_pg = tcg_temp_new_ptr();
5708 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5709 int desc = 0;
500d0484 5710
d28d12f0
RH
5711 if (s->mte_active[0]) {
5712 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5713 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5714 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5715 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5716 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5717 desc <<= SVE_MTEDESC_SHIFT;
5718 }
cdecb3fc 5719 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5720
5721 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5722 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5723 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5724 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5725
5726 tcg_temp_free_ptr(t_zt);
5727 tcg_temp_free_ptr(t_zm);
5728 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5729}
5730
d28d12f0
RH
5731/* Indexed by [mte][be][ff][xs][u][msz]. */
5732static gen_helper_gvec_mem_scatter * const
5733gather_load_fn32[2][2][2][2][2][3] = {
5734 { /* MTE Inactive */
5735 { /* Little-endian */
5736 { { { gen_helper_sve_ldbss_zsu,
5737 gen_helper_sve_ldhss_le_zsu,
5738 NULL, },
5739 { gen_helper_sve_ldbsu_zsu,
5740 gen_helper_sve_ldhsu_le_zsu,
5741 gen_helper_sve_ldss_le_zsu, } },
5742 { { gen_helper_sve_ldbss_zss,
5743 gen_helper_sve_ldhss_le_zss,
5744 NULL, },
5745 { gen_helper_sve_ldbsu_zss,
5746 gen_helper_sve_ldhsu_le_zss,
5747 gen_helper_sve_ldss_le_zss, } } },
5748
5749 /* First-fault */
5750 { { { gen_helper_sve_ldffbss_zsu,
5751 gen_helper_sve_ldffhss_le_zsu,
5752 NULL, },
5753 { gen_helper_sve_ldffbsu_zsu,
5754 gen_helper_sve_ldffhsu_le_zsu,
5755 gen_helper_sve_ldffss_le_zsu, } },
5756 { { gen_helper_sve_ldffbss_zss,
5757 gen_helper_sve_ldffhss_le_zss,
5758 NULL, },
5759 { gen_helper_sve_ldffbsu_zss,
5760 gen_helper_sve_ldffhsu_le_zss,
5761 gen_helper_sve_ldffss_le_zss, } } } },
5762
5763 { /* Big-endian */
5764 { { { gen_helper_sve_ldbss_zsu,
5765 gen_helper_sve_ldhss_be_zsu,
5766 NULL, },
5767 { gen_helper_sve_ldbsu_zsu,
5768 gen_helper_sve_ldhsu_be_zsu,
5769 gen_helper_sve_ldss_be_zsu, } },
5770 { { gen_helper_sve_ldbss_zss,
5771 gen_helper_sve_ldhss_be_zss,
5772 NULL, },
5773 { gen_helper_sve_ldbsu_zss,
5774 gen_helper_sve_ldhsu_be_zss,
5775 gen_helper_sve_ldss_be_zss, } } },
5776
5777 /* First-fault */
5778 { { { gen_helper_sve_ldffbss_zsu,
5779 gen_helper_sve_ldffhss_be_zsu,
5780 NULL, },
5781 { gen_helper_sve_ldffbsu_zsu,
5782 gen_helper_sve_ldffhsu_be_zsu,
5783 gen_helper_sve_ldffss_be_zsu, } },
5784 { { gen_helper_sve_ldffbss_zss,
5785 gen_helper_sve_ldffhss_be_zss,
5786 NULL, },
5787 { gen_helper_sve_ldffbsu_zss,
5788 gen_helper_sve_ldffhsu_be_zss,
5789 gen_helper_sve_ldffss_be_zss, } } } } },
5790 { /* MTE Active */
5791 { /* Little-endian */
5792 { { { gen_helper_sve_ldbss_zsu_mte,
5793 gen_helper_sve_ldhss_le_zsu_mte,
5794 NULL, },
5795 { gen_helper_sve_ldbsu_zsu_mte,
5796 gen_helper_sve_ldhsu_le_zsu_mte,
5797 gen_helper_sve_ldss_le_zsu_mte, } },
5798 { { gen_helper_sve_ldbss_zss_mte,
5799 gen_helper_sve_ldhss_le_zss_mte,
5800 NULL, },
5801 { gen_helper_sve_ldbsu_zss_mte,
5802 gen_helper_sve_ldhsu_le_zss_mte,
5803 gen_helper_sve_ldss_le_zss_mte, } } },
5804
5805 /* First-fault */
5806 { { { gen_helper_sve_ldffbss_zsu_mte,
5807 gen_helper_sve_ldffhss_le_zsu_mte,
5808 NULL, },
5809 { gen_helper_sve_ldffbsu_zsu_mte,
5810 gen_helper_sve_ldffhsu_le_zsu_mte,
5811 gen_helper_sve_ldffss_le_zsu_mte, } },
5812 { { gen_helper_sve_ldffbss_zss_mte,
5813 gen_helper_sve_ldffhss_le_zss_mte,
5814 NULL, },
5815 { gen_helper_sve_ldffbsu_zss_mte,
5816 gen_helper_sve_ldffhsu_le_zss_mte,
5817 gen_helper_sve_ldffss_le_zss_mte, } } } },
5818
5819 { /* Big-endian */
5820 { { { gen_helper_sve_ldbss_zsu_mte,
5821 gen_helper_sve_ldhss_be_zsu_mte,
5822 NULL, },
5823 { gen_helper_sve_ldbsu_zsu_mte,
5824 gen_helper_sve_ldhsu_be_zsu_mte,
5825 gen_helper_sve_ldss_be_zsu_mte, } },
5826 { { gen_helper_sve_ldbss_zss_mte,
5827 gen_helper_sve_ldhss_be_zss_mte,
5828 NULL, },
5829 { gen_helper_sve_ldbsu_zss_mte,
5830 gen_helper_sve_ldhsu_be_zss_mte,
5831 gen_helper_sve_ldss_be_zss_mte, } } },
5832
5833 /* First-fault */
5834 { { { gen_helper_sve_ldffbss_zsu_mte,
5835 gen_helper_sve_ldffhss_be_zsu_mte,
5836 NULL, },
5837 { gen_helper_sve_ldffbsu_zsu_mte,
5838 gen_helper_sve_ldffhsu_be_zsu_mte,
5839 gen_helper_sve_ldffss_be_zsu_mte, } },
5840 { { gen_helper_sve_ldffbss_zss_mte,
5841 gen_helper_sve_ldffhss_be_zss_mte,
5842 NULL, },
5843 { gen_helper_sve_ldffbsu_zss_mte,
5844 gen_helper_sve_ldffhsu_be_zss_mte,
5845 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5846};
5847
5848/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5849static gen_helper_gvec_mem_scatter * const
5850gather_load_fn64[2][2][2][3][2][4] = {
5851 { /* MTE Inactive */
5852 { /* Little-endian */
5853 { { { gen_helper_sve_ldbds_zsu,
5854 gen_helper_sve_ldhds_le_zsu,
5855 gen_helper_sve_ldsds_le_zsu,
5856 NULL, },
5857 { gen_helper_sve_ldbdu_zsu,
5858 gen_helper_sve_ldhdu_le_zsu,
5859 gen_helper_sve_ldsdu_le_zsu,
5860 gen_helper_sve_lddd_le_zsu, } },
5861 { { gen_helper_sve_ldbds_zss,
5862 gen_helper_sve_ldhds_le_zss,
5863 gen_helper_sve_ldsds_le_zss,
5864 NULL, },
5865 { gen_helper_sve_ldbdu_zss,
5866 gen_helper_sve_ldhdu_le_zss,
5867 gen_helper_sve_ldsdu_le_zss,
5868 gen_helper_sve_lddd_le_zss, } },
5869 { { gen_helper_sve_ldbds_zd,
5870 gen_helper_sve_ldhds_le_zd,
5871 gen_helper_sve_ldsds_le_zd,
5872 NULL, },
5873 { gen_helper_sve_ldbdu_zd,
5874 gen_helper_sve_ldhdu_le_zd,
5875 gen_helper_sve_ldsdu_le_zd,
5876 gen_helper_sve_lddd_le_zd, } } },
5877
5878 /* First-fault */
5879 { { { gen_helper_sve_ldffbds_zsu,
5880 gen_helper_sve_ldffhds_le_zsu,
5881 gen_helper_sve_ldffsds_le_zsu,
5882 NULL, },
5883 { gen_helper_sve_ldffbdu_zsu,
5884 gen_helper_sve_ldffhdu_le_zsu,
5885 gen_helper_sve_ldffsdu_le_zsu,
5886 gen_helper_sve_ldffdd_le_zsu, } },
5887 { { gen_helper_sve_ldffbds_zss,
5888 gen_helper_sve_ldffhds_le_zss,
5889 gen_helper_sve_ldffsds_le_zss,
5890 NULL, },
5891 { gen_helper_sve_ldffbdu_zss,
5892 gen_helper_sve_ldffhdu_le_zss,
5893 gen_helper_sve_ldffsdu_le_zss,
5894 gen_helper_sve_ldffdd_le_zss, } },
5895 { { gen_helper_sve_ldffbds_zd,
5896 gen_helper_sve_ldffhds_le_zd,
5897 gen_helper_sve_ldffsds_le_zd,
5898 NULL, },
5899 { gen_helper_sve_ldffbdu_zd,
5900 gen_helper_sve_ldffhdu_le_zd,
5901 gen_helper_sve_ldffsdu_le_zd,
5902 gen_helper_sve_ldffdd_le_zd, } } } },
5903 { /* Big-endian */
5904 { { { gen_helper_sve_ldbds_zsu,
5905 gen_helper_sve_ldhds_be_zsu,
5906 gen_helper_sve_ldsds_be_zsu,
5907 NULL, },
5908 { gen_helper_sve_ldbdu_zsu,
5909 gen_helper_sve_ldhdu_be_zsu,
5910 gen_helper_sve_ldsdu_be_zsu,
5911 gen_helper_sve_lddd_be_zsu, } },
5912 { { gen_helper_sve_ldbds_zss,
5913 gen_helper_sve_ldhds_be_zss,
5914 gen_helper_sve_ldsds_be_zss,
5915 NULL, },
5916 { gen_helper_sve_ldbdu_zss,
5917 gen_helper_sve_ldhdu_be_zss,
5918 gen_helper_sve_ldsdu_be_zss,
5919 gen_helper_sve_lddd_be_zss, } },
5920 { { gen_helper_sve_ldbds_zd,
5921 gen_helper_sve_ldhds_be_zd,
5922 gen_helper_sve_ldsds_be_zd,
5923 NULL, },
5924 { gen_helper_sve_ldbdu_zd,
5925 gen_helper_sve_ldhdu_be_zd,
5926 gen_helper_sve_ldsdu_be_zd,
5927 gen_helper_sve_lddd_be_zd, } } },
5928
5929 /* First-fault */
5930 { { { gen_helper_sve_ldffbds_zsu,
5931 gen_helper_sve_ldffhds_be_zsu,
5932 gen_helper_sve_ldffsds_be_zsu,
5933 NULL, },
5934 { gen_helper_sve_ldffbdu_zsu,
5935 gen_helper_sve_ldffhdu_be_zsu,
5936 gen_helper_sve_ldffsdu_be_zsu,
5937 gen_helper_sve_ldffdd_be_zsu, } },
5938 { { gen_helper_sve_ldffbds_zss,
5939 gen_helper_sve_ldffhds_be_zss,
5940 gen_helper_sve_ldffsds_be_zss,
5941 NULL, },
5942 { gen_helper_sve_ldffbdu_zss,
5943 gen_helper_sve_ldffhdu_be_zss,
5944 gen_helper_sve_ldffsdu_be_zss,
5945 gen_helper_sve_ldffdd_be_zss, } },
5946 { { gen_helper_sve_ldffbds_zd,
5947 gen_helper_sve_ldffhds_be_zd,
5948 gen_helper_sve_ldffsds_be_zd,
5949 NULL, },
5950 { gen_helper_sve_ldffbdu_zd,
5951 gen_helper_sve_ldffhdu_be_zd,
5952 gen_helper_sve_ldffsdu_be_zd,
5953 gen_helper_sve_ldffdd_be_zd, } } } } },
5954 { /* MTE Active */
5955 { /* Little-endian */
5956 { { { gen_helper_sve_ldbds_zsu_mte,
5957 gen_helper_sve_ldhds_le_zsu_mte,
5958 gen_helper_sve_ldsds_le_zsu_mte,
5959 NULL, },
5960 { gen_helper_sve_ldbdu_zsu_mte,
5961 gen_helper_sve_ldhdu_le_zsu_mte,
5962 gen_helper_sve_ldsdu_le_zsu_mte,
5963 gen_helper_sve_lddd_le_zsu_mte, } },
5964 { { gen_helper_sve_ldbds_zss_mte,
5965 gen_helper_sve_ldhds_le_zss_mte,
5966 gen_helper_sve_ldsds_le_zss_mte,
5967 NULL, },
5968 { gen_helper_sve_ldbdu_zss_mte,
5969 gen_helper_sve_ldhdu_le_zss_mte,
5970 gen_helper_sve_ldsdu_le_zss_mte,
5971 gen_helper_sve_lddd_le_zss_mte, } },
5972 { { gen_helper_sve_ldbds_zd_mte,
5973 gen_helper_sve_ldhds_le_zd_mte,
5974 gen_helper_sve_ldsds_le_zd_mte,
5975 NULL, },
5976 { gen_helper_sve_ldbdu_zd_mte,
5977 gen_helper_sve_ldhdu_le_zd_mte,
5978 gen_helper_sve_ldsdu_le_zd_mte,
5979 gen_helper_sve_lddd_le_zd_mte, } } },
5980
5981 /* First-fault */
5982 { { { gen_helper_sve_ldffbds_zsu_mte,
5983 gen_helper_sve_ldffhds_le_zsu_mte,
5984 gen_helper_sve_ldffsds_le_zsu_mte,
5985 NULL, },
5986 { gen_helper_sve_ldffbdu_zsu_mte,
5987 gen_helper_sve_ldffhdu_le_zsu_mte,
5988 gen_helper_sve_ldffsdu_le_zsu_mte,
5989 gen_helper_sve_ldffdd_le_zsu_mte, } },
5990 { { gen_helper_sve_ldffbds_zss_mte,
5991 gen_helper_sve_ldffhds_le_zss_mte,
5992 gen_helper_sve_ldffsds_le_zss_mte,
5993 NULL, },
5994 { gen_helper_sve_ldffbdu_zss_mte,
5995 gen_helper_sve_ldffhdu_le_zss_mte,
5996 gen_helper_sve_ldffsdu_le_zss_mte,
5997 gen_helper_sve_ldffdd_le_zss_mte, } },
5998 { { gen_helper_sve_ldffbds_zd_mte,
5999 gen_helper_sve_ldffhds_le_zd_mte,
6000 gen_helper_sve_ldffsds_le_zd_mte,
6001 NULL, },
6002 { gen_helper_sve_ldffbdu_zd_mte,
6003 gen_helper_sve_ldffhdu_le_zd_mte,
6004 gen_helper_sve_ldffsdu_le_zd_mte,
6005 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6006 { /* Big-endian */
6007 { { { gen_helper_sve_ldbds_zsu_mte,
6008 gen_helper_sve_ldhds_be_zsu_mte,
6009 gen_helper_sve_ldsds_be_zsu_mte,
6010 NULL, },
6011 { gen_helper_sve_ldbdu_zsu_mte,
6012 gen_helper_sve_ldhdu_be_zsu_mte,
6013 gen_helper_sve_ldsdu_be_zsu_mte,
6014 gen_helper_sve_lddd_be_zsu_mte, } },
6015 { { gen_helper_sve_ldbds_zss_mte,
6016 gen_helper_sve_ldhds_be_zss_mte,
6017 gen_helper_sve_ldsds_be_zss_mte,
6018 NULL, },
6019 { gen_helper_sve_ldbdu_zss_mte,
6020 gen_helper_sve_ldhdu_be_zss_mte,
6021 gen_helper_sve_ldsdu_be_zss_mte,
6022 gen_helper_sve_lddd_be_zss_mte, } },
6023 { { gen_helper_sve_ldbds_zd_mte,
6024 gen_helper_sve_ldhds_be_zd_mte,
6025 gen_helper_sve_ldsds_be_zd_mte,
6026 NULL, },
6027 { gen_helper_sve_ldbdu_zd_mte,
6028 gen_helper_sve_ldhdu_be_zd_mte,
6029 gen_helper_sve_ldsdu_be_zd_mte,
6030 gen_helper_sve_lddd_be_zd_mte, } } },
6031
6032 /* First-fault */
6033 { { { gen_helper_sve_ldffbds_zsu_mte,
6034 gen_helper_sve_ldffhds_be_zsu_mte,
6035 gen_helper_sve_ldffsds_be_zsu_mte,
6036 NULL, },
6037 { gen_helper_sve_ldffbdu_zsu_mte,
6038 gen_helper_sve_ldffhdu_be_zsu_mte,
6039 gen_helper_sve_ldffsdu_be_zsu_mte,
6040 gen_helper_sve_ldffdd_be_zsu_mte, } },
6041 { { gen_helper_sve_ldffbds_zss_mte,
6042 gen_helper_sve_ldffhds_be_zss_mte,
6043 gen_helper_sve_ldffsds_be_zss_mte,
6044 NULL, },
6045 { gen_helper_sve_ldffbdu_zss_mte,
6046 gen_helper_sve_ldffhdu_be_zss_mte,
6047 gen_helper_sve_ldffsdu_be_zss_mte,
6048 gen_helper_sve_ldffdd_be_zss_mte, } },
6049 { { gen_helper_sve_ldffbds_zd_mte,
6050 gen_helper_sve_ldffhds_be_zd_mte,
6051 gen_helper_sve_ldffsds_be_zd_mte,
6052 NULL, },
6053 { gen_helper_sve_ldffbdu_zd_mte,
6054 gen_helper_sve_ldffhdu_be_zd_mte,
6055 gen_helper_sve_ldffsdu_be_zd_mte,
6056 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
6057};
6058
3a7be554 6059static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
6060{
6061 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6062 bool be = s->be_data == MO_BE;
6063 bool mte = s->mte_active[0];
673e9fa6
RH
6064
6065 if (!sve_access_check(s)) {
6066 return true;
6067 }
6068
6069 switch (a->esz) {
6070 case MO_32:
d28d12f0 6071 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6072 break;
6073 case MO_64:
d28d12f0 6074 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
6075 break;
6076 }
6077 assert(fn != NULL);
6078
6079 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6080 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
6081 return true;
6082}
6083
3a7be554 6084static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
6085{
6086 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6087 bool be = s->be_data == MO_BE;
6088 bool mte = s->mte_active[0];
673e9fa6
RH
6089
6090 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6091 return false;
6092 }
6093 if (!sve_access_check(s)) {
6094 return true;
6095 }
6096
6097 switch (a->esz) {
6098 case MO_32:
d28d12f0 6099 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
6100 break;
6101 case MO_64:
d28d12f0 6102 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
6103 break;
6104 }
6105 assert(fn != NULL);
6106
6107 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6108 * by loading the immediate into the scalar parameter.
6109 */
2ccdf94f
RH
6110 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6111 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
6112 return true;
6113}
6114
cf327449
SL
6115static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6116{
b17ab470
RH
6117 gen_helper_gvec_mem_scatter *fn = NULL;
6118 bool be = s->be_data == MO_BE;
6119 bool mte = s->mte_active[0];
6120
6121 if (a->esz < a->msz + !a->u) {
6122 return false;
6123 }
cf327449
SL
6124 if (!dc_isar_feature(aa64_sve2, s)) {
6125 return false;
6126 }
b17ab470
RH
6127 if (!sve_access_check(s)) {
6128 return true;
6129 }
6130
6131 switch (a->esz) {
6132 case MO_32:
6133 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6134 break;
6135 case MO_64:
6136 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6137 break;
6138 }
6139 assert(fn != NULL);
6140
6141 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6142 cpu_reg(s, a->rm), a->msz, false, fn);
6143 return true;
cf327449
SL
6144}
6145
d28d12f0
RH
6146/* Indexed by [mte][be][xs][msz]. */
6147static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6148 { /* MTE Inactive */
6149 { /* Little-endian */
6150 { gen_helper_sve_stbs_zsu,
6151 gen_helper_sve_sths_le_zsu,
6152 gen_helper_sve_stss_le_zsu, },
6153 { gen_helper_sve_stbs_zss,
6154 gen_helper_sve_sths_le_zss,
6155 gen_helper_sve_stss_le_zss, } },
6156 { /* Big-endian */
6157 { gen_helper_sve_stbs_zsu,
6158 gen_helper_sve_sths_be_zsu,
6159 gen_helper_sve_stss_be_zsu, },
6160 { gen_helper_sve_stbs_zss,
6161 gen_helper_sve_sths_be_zss,
6162 gen_helper_sve_stss_be_zss, } } },
6163 { /* MTE Active */
6164 { /* Little-endian */
6165 { gen_helper_sve_stbs_zsu_mte,
6166 gen_helper_sve_sths_le_zsu_mte,
6167 gen_helper_sve_stss_le_zsu_mte, },
6168 { gen_helper_sve_stbs_zss_mte,
6169 gen_helper_sve_sths_le_zss_mte,
6170 gen_helper_sve_stss_le_zss_mte, } },
6171 { /* Big-endian */
6172 { gen_helper_sve_stbs_zsu_mte,
6173 gen_helper_sve_sths_be_zsu_mte,
6174 gen_helper_sve_stss_be_zsu_mte, },
6175 { gen_helper_sve_stbs_zss_mte,
6176 gen_helper_sve_sths_be_zss_mte,
6177 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
6178};
6179
6180/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
6181static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6182 { /* MTE Inactive */
6183 { /* Little-endian */
6184 { gen_helper_sve_stbd_zsu,
6185 gen_helper_sve_sthd_le_zsu,
6186 gen_helper_sve_stsd_le_zsu,
6187 gen_helper_sve_stdd_le_zsu, },
6188 { gen_helper_sve_stbd_zss,
6189 gen_helper_sve_sthd_le_zss,
6190 gen_helper_sve_stsd_le_zss,
6191 gen_helper_sve_stdd_le_zss, },
6192 { gen_helper_sve_stbd_zd,
6193 gen_helper_sve_sthd_le_zd,
6194 gen_helper_sve_stsd_le_zd,
6195 gen_helper_sve_stdd_le_zd, } },
6196 { /* Big-endian */
6197 { gen_helper_sve_stbd_zsu,
6198 gen_helper_sve_sthd_be_zsu,
6199 gen_helper_sve_stsd_be_zsu,
6200 gen_helper_sve_stdd_be_zsu, },
6201 { gen_helper_sve_stbd_zss,
6202 gen_helper_sve_sthd_be_zss,
6203 gen_helper_sve_stsd_be_zss,
6204 gen_helper_sve_stdd_be_zss, },
6205 { gen_helper_sve_stbd_zd,
6206 gen_helper_sve_sthd_be_zd,
6207 gen_helper_sve_stsd_be_zd,
6208 gen_helper_sve_stdd_be_zd, } } },
6209 { /* MTE Inactive */
6210 { /* Little-endian */
6211 { gen_helper_sve_stbd_zsu_mte,
6212 gen_helper_sve_sthd_le_zsu_mte,
6213 gen_helper_sve_stsd_le_zsu_mte,
6214 gen_helper_sve_stdd_le_zsu_mte, },
6215 { gen_helper_sve_stbd_zss_mte,
6216 gen_helper_sve_sthd_le_zss_mte,
6217 gen_helper_sve_stsd_le_zss_mte,
6218 gen_helper_sve_stdd_le_zss_mte, },
6219 { gen_helper_sve_stbd_zd_mte,
6220 gen_helper_sve_sthd_le_zd_mte,
6221 gen_helper_sve_stsd_le_zd_mte,
6222 gen_helper_sve_stdd_le_zd_mte, } },
6223 { /* Big-endian */
6224 { gen_helper_sve_stbd_zsu_mte,
6225 gen_helper_sve_sthd_be_zsu_mte,
6226 gen_helper_sve_stsd_be_zsu_mte,
6227 gen_helper_sve_stdd_be_zsu_mte, },
6228 { gen_helper_sve_stbd_zss_mte,
6229 gen_helper_sve_sthd_be_zss_mte,
6230 gen_helper_sve_stsd_be_zss_mte,
6231 gen_helper_sve_stdd_be_zss_mte, },
6232 { gen_helper_sve_stbd_zd_mte,
6233 gen_helper_sve_sthd_be_zd_mte,
6234 gen_helper_sve_stsd_be_zd_mte,
6235 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
6236};
6237
3a7be554 6238static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 6239{
f6dbf62a 6240 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
6241 bool be = s->be_data == MO_BE;
6242 bool mte = s->mte_active[0];
f6dbf62a
RH
6243
6244 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6245 return false;
6246 }
6247 if (!sve_access_check(s)) {
6248 return true;
6249 }
6250 switch (a->esz) {
6251 case MO_32:
d28d12f0 6252 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
6253 break;
6254 case MO_64:
d28d12f0 6255 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
6256 break;
6257 default:
6258 g_assert_not_reached();
6259 }
6260 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 6261 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
6262 return true;
6263}
dec6cf6b 6264
3a7be554 6265static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
6266{
6267 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
6268 bool be = s->be_data == MO_BE;
6269 bool mte = s->mte_active[0];
408ecde9
RH
6270
6271 if (a->esz < a->msz) {
6272 return false;
6273 }
6274 if (!sve_access_check(s)) {
6275 return true;
6276 }
6277
6278 switch (a->esz) {
6279 case MO_32:
d28d12f0 6280 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
6281 break;
6282 case MO_64:
d28d12f0 6283 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
6284 break;
6285 }
6286 assert(fn != NULL);
6287
6288 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6289 * by loading the immediate into the scalar parameter.
6290 */
2ccdf94f
RH
6291 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6292 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
6293 return true;
6294}
6295
6ebca45f
SL
6296static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6297{
b17ab470
RH
6298 gen_helper_gvec_mem_scatter *fn;
6299 bool be = s->be_data == MO_BE;
6300 bool mte = s->mte_active[0];
6301
6302 if (a->esz < a->msz) {
6303 return false;
6304 }
6ebca45f
SL
6305 if (!dc_isar_feature(aa64_sve2, s)) {
6306 return false;
6307 }
b17ab470
RH
6308 if (!sve_access_check(s)) {
6309 return true;
6310 }
6311
6312 switch (a->esz) {
6313 case MO_32:
6314 fn = scatter_store_fn32[mte][be][0][a->msz];
6315 break;
6316 case MO_64:
6317 fn = scatter_store_fn64[mte][be][2][a->msz];
6318 break;
6319 default:
6320 g_assert_not_reached();
6321 }
6322
6323 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6324 cpu_reg(s, a->rm), a->msz, true, fn);
6325 return true;
6ebca45f
SL
6326}
6327
dec6cf6b
RH
6328/*
6329 * Prefetches
6330 */
6331
3a7be554 6332static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6333{
6334 /* Prefetch is a nop within QEMU. */
2f95a3b0 6335 (void)sve_access_check(s);
dec6cf6b
RH
6336 return true;
6337}
6338
3a7be554 6339static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6340{
6341 if (a->rm == 31) {
6342 return false;
6343 }
6344 /* Prefetch is a nop within QEMU. */
2f95a3b0 6345 (void)sve_access_check(s);
dec6cf6b
RH
6346 return true;
6347}
a2103582
RH
6348
6349/*
6350 * Move Prefix
6351 *
6352 * TODO: The implementation so far could handle predicated merging movprfx.
6353 * The helper functions as written take an extra source register to
6354 * use in the operation, but the result is only written when predication
6355 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6356 * to allow the final write back to the destination to be unconditional.
6357 * For predicated zeroing movprfx, we need to rearrange the helpers to
6358 * allow the final write back to zero inactives.
6359 *
6360 * In the meantime, just emit the moves.
6361 */
6362
3a7be554 6363static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
6364{
6365 return do_mov_z(s, a->rd, a->rn);
6366}
6367
3a7be554 6368static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
6369{
6370 if (sve_access_check(s)) {
6371 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6372 }
6373 return true;
6374}
6375
3a7be554 6376static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 6377{
60245996 6378 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 6379}
5dad1ba5
RH
6380
6381/*
6382 * SVE2 Integer Multiply - Unpredicated
6383 */
6384
b262215b 6385TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 6386
bd394cf5
RH
6387static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6388 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6389 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6390};
6391TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6392 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6393
bd394cf5
RH
6394static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6395 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6396 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6397};
6398TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6399 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6400
bd394cf5
RH
6401TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6402 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6403
bd394cf5
RH
6404static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6405 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6406 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6407};
6408TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6409 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6410
bd394cf5
RH
6411static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6412 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6413 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6414};
6415TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6416 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6417
d4b1e59d
RH
6418/*
6419 * SVE2 Integer - Predicated
6420 */
6421
5880bdc0
RH
6422static gen_helper_gvec_4 * const sadlp_fns[4] = {
6423 NULL, gen_helper_sve2_sadalp_zpzz_h,
6424 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6425};
6426TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6427 sadlp_fns[a->esz], a, 0)
d4b1e59d 6428
5880bdc0
RH
6429static gen_helper_gvec_4 * const uadlp_fns[4] = {
6430 NULL, gen_helper_sve2_uadalp_zpzz_h,
6431 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6432};
6433TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6434 uadlp_fns[a->esz], a, 0)
db366da8
RH
6435
6436/*
6437 * SVE2 integer unary operations (predicated)
6438 */
6439
b2c00961
RH
6440TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6441 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6442
b2c00961
RH
6443TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6444 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6445
b2c00961
RH
6446static gen_helper_gvec_3 * const sqabs_fns[4] = {
6447 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6448 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6449};
6450TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6451
b2c00961
RH
6452static gen_helper_gvec_3 * const sqneg_fns[4] = {
6453 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6454 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6455};
6456TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 6457
5880bdc0
RH
6458DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6459DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6460DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 6461
5880bdc0
RH
6462DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6463DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6464DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 6465
5880bdc0
RH
6466DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6467DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6468DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 6469
5880bdc0
RH
6470DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6471DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6472DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 6473
5880bdc0
RH
6474DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6475DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6476DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6477DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6478DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6479
5880bdc0
RH
6480DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6481DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6482DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6483DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6484DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6485DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6486
6487/*
6488 * SVE2 Widening Integer Arithmetic
6489 */
6490
615f19fe
RH
6491static gen_helper_gvec_3 * const saddl_fns[4] = {
6492 NULL, gen_helper_sve2_saddl_h,
6493 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6494};
6495TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6496 saddl_fns[a->esz], a, 0)
6497TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6498 saddl_fns[a->esz], a, 3)
6499TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6500 saddl_fns[a->esz], a, 2)
6501
6502static gen_helper_gvec_3 * const ssubl_fns[4] = {
6503 NULL, gen_helper_sve2_ssubl_h,
6504 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6505};
6506TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6507 ssubl_fns[a->esz], a, 0)
6508TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6509 ssubl_fns[a->esz], a, 3)
6510TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6511 ssubl_fns[a->esz], a, 2)
6512TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6513 ssubl_fns[a->esz], a, 1)
6514
6515static gen_helper_gvec_3 * const sabdl_fns[4] = {
6516 NULL, gen_helper_sve2_sabdl_h,
6517 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6518};
6519TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6520 sabdl_fns[a->esz], a, 0)
6521TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6522 sabdl_fns[a->esz], a, 3)
6523
6524static gen_helper_gvec_3 * const uaddl_fns[4] = {
6525 NULL, gen_helper_sve2_uaddl_h,
6526 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6527};
6528TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6529 uaddl_fns[a->esz], a, 0)
6530TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6531 uaddl_fns[a->esz], a, 3)
6532
6533static gen_helper_gvec_3 * const usubl_fns[4] = {
6534 NULL, gen_helper_sve2_usubl_h,
6535 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6536};
6537TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6538 usubl_fns[a->esz], a, 0)
6539TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6540 usubl_fns[a->esz], a, 3)
6541
6542static gen_helper_gvec_3 * const uabdl_fns[4] = {
6543 NULL, gen_helper_sve2_uabdl_h,
6544 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6545};
6546TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6547 uabdl_fns[a->esz], a, 0)
6548TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6549 uabdl_fns[a->esz], a, 3)
6550
6551static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6552 NULL, gen_helper_sve2_sqdmull_zzz_h,
6553 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6554};
6555TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6556 sqdmull_fns[a->esz], a, 0)
6557TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6558 sqdmull_fns[a->esz], a, 3)
6559
6560static gen_helper_gvec_3 * const smull_fns[4] = {
6561 NULL, gen_helper_sve2_smull_zzz_h,
6562 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6563};
6564TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6565 smull_fns[a->esz], a, 0)
6566TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6567 smull_fns[a->esz], a, 3)
6568
6569static gen_helper_gvec_3 * const umull_fns[4] = {
6570 NULL, gen_helper_sve2_umull_zzz_h,
6571 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6572};
6573TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6574 umull_fns[a->esz], a, 0)
6575TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6576 umull_fns[a->esz], a, 3)
6577
6578static gen_helper_gvec_3 * const eoril_fns[4] = {
6579 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6580 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6581};
6582TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6583TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6584
e3a56131
RH
6585static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6586{
6587 static gen_helper_gvec_3 * const fns[4] = {
6588 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6589 NULL, gen_helper_sve2_pmull_d,
6590 };
6591 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6592 return false;
6593 }
615f19fe 6594 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6595}
6596
615f19fe
RH
6597TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6598TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6599
615f19fe
RH
6600static gen_helper_gvec_3 * const saddw_fns[4] = {
6601 NULL, gen_helper_sve2_saddw_h,
6602 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6603};
6604TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6605TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6606
615f19fe
RH
6607static gen_helper_gvec_3 * const ssubw_fns[4] = {
6608 NULL, gen_helper_sve2_ssubw_h,
6609 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6610};
6611TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6612TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6613
615f19fe
RH
6614static gen_helper_gvec_3 * const uaddw_fns[4] = {
6615 NULL, gen_helper_sve2_uaddw_h,
6616 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6617};
6618TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6619TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6620
615f19fe
RH
6621static gen_helper_gvec_3 * const usubw_fns[4] = {
6622 NULL, gen_helper_sve2_usubw_h,
6623 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6624};
6625TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6626TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6627
6628static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6629{
6630 int top = imm & 1;
6631 int shl = imm >> 1;
6632 int halfbits = 4 << vece;
6633
6634 if (top) {
6635 if (shl == halfbits) {
6636 TCGv_vec t = tcg_temp_new_vec_matching(d);
6637 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6638 tcg_gen_and_vec(vece, d, n, t);
6639 tcg_temp_free_vec(t);
6640 } else {
6641 tcg_gen_sari_vec(vece, d, n, halfbits);
6642 tcg_gen_shli_vec(vece, d, d, shl);
6643 }
6644 } else {
6645 tcg_gen_shli_vec(vece, d, n, halfbits);
6646 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6647 }
6648}
6649
6650static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6651{
6652 int halfbits = 4 << vece;
6653 int top = imm & 1;
6654 int shl = (imm >> 1);
6655 int shift;
6656 uint64_t mask;
6657
6658 mask = MAKE_64BIT_MASK(0, halfbits);
6659 mask <<= shl;
6660 mask = dup_const(vece, mask);
6661
6662 shift = shl - top * halfbits;
6663 if (shift < 0) {
6664 tcg_gen_shri_i64(d, n, -shift);
6665 } else {
6666 tcg_gen_shli_i64(d, n, shift);
6667 }
6668 tcg_gen_andi_i64(d, d, mask);
6669}
6670
6671static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6672{
6673 gen_ushll_i64(MO_16, d, n, imm);
6674}
6675
6676static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6677{
6678 gen_ushll_i64(MO_32, d, n, imm);
6679}
6680
6681static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6682{
6683 gen_ushll_i64(MO_64, d, n, imm);
6684}
6685
6686static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6687{
6688 int halfbits = 4 << vece;
6689 int top = imm & 1;
6690 int shl = imm >> 1;
6691
6692 if (top) {
6693 if (shl == halfbits) {
6694 TCGv_vec t = tcg_temp_new_vec_matching(d);
6695 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6696 tcg_gen_and_vec(vece, d, n, t);
6697 tcg_temp_free_vec(t);
6698 } else {
6699 tcg_gen_shri_vec(vece, d, n, halfbits);
6700 tcg_gen_shli_vec(vece, d, d, shl);
6701 }
6702 } else {
6703 if (shl == 0) {
6704 TCGv_vec t = tcg_temp_new_vec_matching(d);
6705 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6706 tcg_gen_and_vec(vece, d, n, t);
6707 tcg_temp_free_vec(t);
6708 } else {
6709 tcg_gen_shli_vec(vece, d, n, halfbits);
6710 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6711 }
6712 }
6713}
6714
6715static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6716 bool sel, bool uns)
6717{
6718 static const TCGOpcode sshll_list[] = {
6719 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6720 };
6721 static const TCGOpcode ushll_list[] = {
6722 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6723 };
6724 static const GVecGen2i ops[2][3] = {
6725 { { .fniv = gen_sshll_vec,
6726 .opt_opc = sshll_list,
6727 .fno = gen_helper_sve2_sshll_h,
6728 .vece = MO_16 },
6729 { .fniv = gen_sshll_vec,
6730 .opt_opc = sshll_list,
6731 .fno = gen_helper_sve2_sshll_s,
6732 .vece = MO_32 },
6733 { .fniv = gen_sshll_vec,
6734 .opt_opc = sshll_list,
6735 .fno = gen_helper_sve2_sshll_d,
6736 .vece = MO_64 } },
6737 { { .fni8 = gen_ushll16_i64,
6738 .fniv = gen_ushll_vec,
6739 .opt_opc = ushll_list,
6740 .fno = gen_helper_sve2_ushll_h,
6741 .vece = MO_16 },
6742 { .fni8 = gen_ushll32_i64,
6743 .fniv = gen_ushll_vec,
6744 .opt_opc = ushll_list,
6745 .fno = gen_helper_sve2_ushll_s,
6746 .vece = MO_32 },
6747 { .fni8 = gen_ushll64_i64,
6748 .fniv = gen_ushll_vec,
6749 .opt_opc = ushll_list,
6750 .fno = gen_helper_sve2_ushll_d,
6751 .vece = MO_64 } },
6752 };
6753
6754 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6755 return false;
6756 }
6757 if (sve_access_check(s)) {
6758 unsigned vsz = vec_full_reg_size(s);
6759 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6760 vec_full_reg_offset(s, a->rn),
6761 vsz, vsz, (a->imm << 1) | sel,
6762 &ops[uns][a->esz]);
6763 }
6764 return true;
6765}
6766
6767static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6768{
6769 return do_sve2_shll_tb(s, a, false, false);
6770}
6771
6772static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6773{
6774 return do_sve2_shll_tb(s, a, true, false);
6775}
6776
6777static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6778{
6779 return do_sve2_shll_tb(s, a, false, true);
6780}
6781
6782static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6783{
6784 return do_sve2_shll_tb(s, a, true, true);
6785}
cb9c33b8 6786
615f19fe
RH
6787static gen_helper_gvec_3 * const bext_fns[4] = {
6788 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6789 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6790};
6791TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6792 bext_fns[a->esz], a, 0)
ed4a6387 6793
615f19fe
RH
6794static gen_helper_gvec_3 * const bdep_fns[4] = {
6795 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6796 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6797};
6798TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6799 bdep_fns[a->esz], a, 0)
ed4a6387 6800
615f19fe
RH
6801static gen_helper_gvec_3 * const bgrp_fns[4] = {
6802 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6803 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6804};
6805TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6806 bgrp_fns[a->esz], a, 0)
ed4a6387 6807
615f19fe
RH
6808static gen_helper_gvec_3 * const cadd_fns[4] = {
6809 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6810 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6811};
6812TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6813 cadd_fns[a->esz], a, 0)
6814TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6815 cadd_fns[a->esz], a, 1)
6816
6817static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6818 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6819 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6820};
6821TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6822 sqcadd_fns[a->esz], a, 0)
6823TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6824 sqcadd_fns[a->esz], a, 1)
38650638 6825
eeb4e84d
RH
6826static gen_helper_gvec_4 * const sabal_fns[4] = {
6827 NULL, gen_helper_sve2_sabal_h,
6828 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6829};
6830TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6831TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6832
eeb4e84d
RH
6833static gen_helper_gvec_4 * const uabal_fns[4] = {
6834 NULL, gen_helper_sve2_uabal_h,
6835 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6836};
6837TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6838TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6839
6840static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6841{
6842 static gen_helper_gvec_4 * const fns[2] = {
6843 gen_helper_sve2_adcl_s,
6844 gen_helper_sve2_adcl_d,
6845 };
6846 /*
6847 * Note that in this case the ESZ field encodes both size and sign.
6848 * Split out 'subtract' into bit 1 of the data field for the helper.
6849 */
eeb4e84d 6850 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6851}
6852
eeb4e84d
RH
6853TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6854TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e
RH
6855
6856static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
6857{
6858 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
6859 return false;
6860 }
6861 if (sve_access_check(s)) {
6862 unsigned vsz = vec_full_reg_size(s);
6863 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
6864 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
6865 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
6866 }
6867 return true;
6868}
6869
6870static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
6871{
6872 return do_sve2_fn2i(s, a, gen_gvec_ssra);
6873}
6874
6875static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
6876{
6877 return do_sve2_fn2i(s, a, gen_gvec_usra);
6878}
6879
6880static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
6881{
6882 return do_sve2_fn2i(s, a, gen_gvec_srsra);
6883}
6884
6885static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
6886{
6887 return do_sve2_fn2i(s, a, gen_gvec_ursra);
6888}
fc12b46a
RH
6889
6890static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
6891{
6892 return do_sve2_fn2i(s, a, gen_gvec_sri);
6893}
6894
6895static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
6896{
6897 return do_sve2_fn2i(s, a, gen_gvec_sli);
6898}
289a1797
RH
6899
6900static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
6901{
6902 if (!dc_isar_feature(aa64_sve2, s)) {
6903 return false;
6904 }
f96aae76 6905 return gen_gvec_fn_arg_zzz(s, fn, a);
289a1797
RH
6906}
6907
6908static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
6909{
6910 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
6911}
6912
6913static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
6914{
6915 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
6916}
5ff2838d
RH
6917
6918static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6919 const GVecGen2 ops[3])
6920{
6921 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6922 !dc_isar_feature(aa64_sve2, s)) {
6923 return false;
6924 }
6925 if (sve_access_check(s)) {
6926 unsigned vsz = vec_full_reg_size(s);
6927 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6928 vec_full_reg_offset(s, a->rn),
6929 vsz, vsz, &ops[a->esz]);
6930 }
6931 return true;
6932}
6933
6934static const TCGOpcode sqxtn_list[] = {
6935 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6936};
6937
6938static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6939{
6940 TCGv_vec t = tcg_temp_new_vec_matching(d);
6941 int halfbits = 4 << vece;
6942 int64_t mask = (1ull << halfbits) - 1;
6943 int64_t min = -1ull << (halfbits - 1);
6944 int64_t max = -min - 1;
6945
6946 tcg_gen_dupi_vec(vece, t, min);
6947 tcg_gen_smax_vec(vece, d, n, t);
6948 tcg_gen_dupi_vec(vece, t, max);
6949 tcg_gen_smin_vec(vece, d, d, t);
6950 tcg_gen_dupi_vec(vece, t, mask);
6951 tcg_gen_and_vec(vece, d, d, t);
6952 tcg_temp_free_vec(t);
6953}
6954
6955static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6956{
6957 static const GVecGen2 ops[3] = {
6958 { .fniv = gen_sqxtnb_vec,
6959 .opt_opc = sqxtn_list,
6960 .fno = gen_helper_sve2_sqxtnb_h,
6961 .vece = MO_16 },
6962 { .fniv = gen_sqxtnb_vec,
6963 .opt_opc = sqxtn_list,
6964 .fno = gen_helper_sve2_sqxtnb_s,
6965 .vece = MO_32 },
6966 { .fniv = gen_sqxtnb_vec,
6967 .opt_opc = sqxtn_list,
6968 .fno = gen_helper_sve2_sqxtnb_d,
6969 .vece = MO_64 },
6970 };
6971 return do_sve2_narrow_extract(s, a, ops);
6972}
6973
6974static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6975{
6976 TCGv_vec t = tcg_temp_new_vec_matching(d);
6977 int halfbits = 4 << vece;
6978 int64_t mask = (1ull << halfbits) - 1;
6979 int64_t min = -1ull << (halfbits - 1);
6980 int64_t max = -min - 1;
6981
6982 tcg_gen_dupi_vec(vece, t, min);
6983 tcg_gen_smax_vec(vece, n, n, t);
6984 tcg_gen_dupi_vec(vece, t, max);
6985 tcg_gen_smin_vec(vece, n, n, t);
6986 tcg_gen_shli_vec(vece, n, n, halfbits);
6987 tcg_gen_dupi_vec(vece, t, mask);
6988 tcg_gen_bitsel_vec(vece, d, t, d, n);
6989 tcg_temp_free_vec(t);
6990}
6991
6992static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6993{
6994 static const GVecGen2 ops[3] = {
6995 { .fniv = gen_sqxtnt_vec,
6996 .opt_opc = sqxtn_list,
6997 .load_dest = true,
6998 .fno = gen_helper_sve2_sqxtnt_h,
6999 .vece = MO_16 },
7000 { .fniv = gen_sqxtnt_vec,
7001 .opt_opc = sqxtn_list,
7002 .load_dest = true,
7003 .fno = gen_helper_sve2_sqxtnt_s,
7004 .vece = MO_32 },
7005 { .fniv = gen_sqxtnt_vec,
7006 .opt_opc = sqxtn_list,
7007 .load_dest = true,
7008 .fno = gen_helper_sve2_sqxtnt_d,
7009 .vece = MO_64 },
7010 };
7011 return do_sve2_narrow_extract(s, a, ops);
7012}
7013
7014static const TCGOpcode uqxtn_list[] = {
7015 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7016};
7017
7018static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7019{
7020 TCGv_vec t = tcg_temp_new_vec_matching(d);
7021 int halfbits = 4 << vece;
7022 int64_t max = (1ull << halfbits) - 1;
7023
7024 tcg_gen_dupi_vec(vece, t, max);
7025 tcg_gen_umin_vec(vece, d, n, t);
7026 tcg_temp_free_vec(t);
7027}
7028
7029static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7030{
7031 static const GVecGen2 ops[3] = {
7032 { .fniv = gen_uqxtnb_vec,
7033 .opt_opc = uqxtn_list,
7034 .fno = gen_helper_sve2_uqxtnb_h,
7035 .vece = MO_16 },
7036 { .fniv = gen_uqxtnb_vec,
7037 .opt_opc = uqxtn_list,
7038 .fno = gen_helper_sve2_uqxtnb_s,
7039 .vece = MO_32 },
7040 { .fniv = gen_uqxtnb_vec,
7041 .opt_opc = uqxtn_list,
7042 .fno = gen_helper_sve2_uqxtnb_d,
7043 .vece = MO_64 },
7044 };
7045 return do_sve2_narrow_extract(s, a, ops);
7046}
7047
7048static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7049{
7050 TCGv_vec t = tcg_temp_new_vec_matching(d);
7051 int halfbits = 4 << vece;
7052 int64_t max = (1ull << halfbits) - 1;
7053
7054 tcg_gen_dupi_vec(vece, t, max);
7055 tcg_gen_umin_vec(vece, n, n, t);
7056 tcg_gen_shli_vec(vece, n, n, halfbits);
7057 tcg_gen_bitsel_vec(vece, d, t, d, n);
7058 tcg_temp_free_vec(t);
7059}
7060
7061static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7062{
7063 static const GVecGen2 ops[3] = {
7064 { .fniv = gen_uqxtnt_vec,
7065 .opt_opc = uqxtn_list,
7066 .load_dest = true,
7067 .fno = gen_helper_sve2_uqxtnt_h,
7068 .vece = MO_16 },
7069 { .fniv = gen_uqxtnt_vec,
7070 .opt_opc = uqxtn_list,
7071 .load_dest = true,
7072 .fno = gen_helper_sve2_uqxtnt_s,
7073 .vece = MO_32 },
7074 { .fniv = gen_uqxtnt_vec,
7075 .opt_opc = uqxtn_list,
7076 .load_dest = true,
7077 .fno = gen_helper_sve2_uqxtnt_d,
7078 .vece = MO_64 },
7079 };
7080 return do_sve2_narrow_extract(s, a, ops);
7081}
7082
7083static const TCGOpcode sqxtun_list[] = {
7084 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7085};
7086
7087static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7088{
7089 TCGv_vec t = tcg_temp_new_vec_matching(d);
7090 int halfbits = 4 << vece;
7091 int64_t max = (1ull << halfbits) - 1;
7092
7093 tcg_gen_dupi_vec(vece, t, 0);
7094 tcg_gen_smax_vec(vece, d, n, t);
7095 tcg_gen_dupi_vec(vece, t, max);
7096 tcg_gen_umin_vec(vece, d, d, t);
7097 tcg_temp_free_vec(t);
7098}
7099
7100static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7101{
7102 static const GVecGen2 ops[3] = {
7103 { .fniv = gen_sqxtunb_vec,
7104 .opt_opc = sqxtun_list,
7105 .fno = gen_helper_sve2_sqxtunb_h,
7106 .vece = MO_16 },
7107 { .fniv = gen_sqxtunb_vec,
7108 .opt_opc = sqxtun_list,
7109 .fno = gen_helper_sve2_sqxtunb_s,
7110 .vece = MO_32 },
7111 { .fniv = gen_sqxtunb_vec,
7112 .opt_opc = sqxtun_list,
7113 .fno = gen_helper_sve2_sqxtunb_d,
7114 .vece = MO_64 },
7115 };
7116 return do_sve2_narrow_extract(s, a, ops);
7117}
7118
7119static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7120{
7121 TCGv_vec t = tcg_temp_new_vec_matching(d);
7122 int halfbits = 4 << vece;
7123 int64_t max = (1ull << halfbits) - 1;
7124
7125 tcg_gen_dupi_vec(vece, t, 0);
7126 tcg_gen_smax_vec(vece, n, n, t);
7127 tcg_gen_dupi_vec(vece, t, max);
7128 tcg_gen_umin_vec(vece, n, n, t);
7129 tcg_gen_shli_vec(vece, n, n, halfbits);
7130 tcg_gen_bitsel_vec(vece, d, t, d, n);
7131 tcg_temp_free_vec(t);
7132}
7133
7134static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7135{
7136 static const GVecGen2 ops[3] = {
7137 { .fniv = gen_sqxtunt_vec,
7138 .opt_opc = sqxtun_list,
7139 .load_dest = true,
7140 .fno = gen_helper_sve2_sqxtunt_h,
7141 .vece = MO_16 },
7142 { .fniv = gen_sqxtunt_vec,
7143 .opt_opc = sqxtun_list,
7144 .load_dest = true,
7145 .fno = gen_helper_sve2_sqxtunt_s,
7146 .vece = MO_32 },
7147 { .fniv = gen_sqxtunt_vec,
7148 .opt_opc = sqxtun_list,
7149 .load_dest = true,
7150 .fno = gen_helper_sve2_sqxtunt_d,
7151 .vece = MO_64 },
7152 };
7153 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
7154}
7155
7156static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7157 const GVecGen2i ops[3])
7158{
7159 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7160 return false;
7161 }
7162 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7163 if (sve_access_check(s)) {
7164 unsigned vsz = vec_full_reg_size(s);
7165 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7166 vec_full_reg_offset(s, a->rn),
7167 vsz, vsz, a->imm, &ops[a->esz]);
7168 }
7169 return true;
7170}
7171
7172static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7173{
7174 int halfbits = 4 << vece;
7175 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7176
7177 tcg_gen_shri_i64(d, n, shr);
7178 tcg_gen_andi_i64(d, d, mask);
7179}
7180
7181static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7182{
7183 gen_shrnb_i64(MO_16, d, n, shr);
7184}
7185
7186static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7187{
7188 gen_shrnb_i64(MO_32, d, n, shr);
7189}
7190
7191static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7192{
7193 gen_shrnb_i64(MO_64, d, n, shr);
7194}
7195
7196static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7197{
7198 TCGv_vec t = tcg_temp_new_vec_matching(d);
7199 int halfbits = 4 << vece;
7200 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7201
7202 tcg_gen_shri_vec(vece, n, n, shr);
7203 tcg_gen_dupi_vec(vece, t, mask);
7204 tcg_gen_and_vec(vece, d, n, t);
7205 tcg_temp_free_vec(t);
7206}
7207
7208static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7209{
7210 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7211 static const GVecGen2i ops[3] = {
7212 { .fni8 = gen_shrnb16_i64,
7213 .fniv = gen_shrnb_vec,
7214 .opt_opc = vec_list,
7215 .fno = gen_helper_sve2_shrnb_h,
7216 .vece = MO_16 },
7217 { .fni8 = gen_shrnb32_i64,
7218 .fniv = gen_shrnb_vec,
7219 .opt_opc = vec_list,
7220 .fno = gen_helper_sve2_shrnb_s,
7221 .vece = MO_32 },
7222 { .fni8 = gen_shrnb64_i64,
7223 .fniv = gen_shrnb_vec,
7224 .opt_opc = vec_list,
7225 .fno = gen_helper_sve2_shrnb_d,
7226 .vece = MO_64 },
7227 };
7228 return do_sve2_shr_narrow(s, a, ops);
7229}
7230
7231static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7232{
7233 int halfbits = 4 << vece;
7234 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7235
7236 tcg_gen_shli_i64(n, n, halfbits - shr);
7237 tcg_gen_andi_i64(n, n, ~mask);
7238 tcg_gen_andi_i64(d, d, mask);
7239 tcg_gen_or_i64(d, d, n);
7240}
7241
7242static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7243{
7244 gen_shrnt_i64(MO_16, d, n, shr);
7245}
7246
7247static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7248{
7249 gen_shrnt_i64(MO_32, d, n, shr);
7250}
7251
7252static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7253{
7254 tcg_gen_shri_i64(n, n, shr);
7255 tcg_gen_deposit_i64(d, d, n, 32, 32);
7256}
7257
7258static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7259{
7260 TCGv_vec t = tcg_temp_new_vec_matching(d);
7261 int halfbits = 4 << vece;
7262 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7263
7264 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7265 tcg_gen_dupi_vec(vece, t, mask);
7266 tcg_gen_bitsel_vec(vece, d, t, d, n);
7267 tcg_temp_free_vec(t);
7268}
7269
7270static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7271{
7272 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7273 static const GVecGen2i ops[3] = {
7274 { .fni8 = gen_shrnt16_i64,
7275 .fniv = gen_shrnt_vec,
7276 .opt_opc = vec_list,
7277 .load_dest = true,
7278 .fno = gen_helper_sve2_shrnt_h,
7279 .vece = MO_16 },
7280 { .fni8 = gen_shrnt32_i64,
7281 .fniv = gen_shrnt_vec,
7282 .opt_opc = vec_list,
7283 .load_dest = true,
7284 .fno = gen_helper_sve2_shrnt_s,
7285 .vece = MO_32 },
7286 { .fni8 = gen_shrnt64_i64,
7287 .fniv = gen_shrnt_vec,
7288 .opt_opc = vec_list,
7289 .load_dest = true,
7290 .fno = gen_helper_sve2_shrnt_d,
7291 .vece = MO_64 },
7292 };
7293 return do_sve2_shr_narrow(s, a, ops);
7294}
7295
7296static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7297{
7298 static const GVecGen2i ops[3] = {
7299 { .fno = gen_helper_sve2_rshrnb_h },
7300 { .fno = gen_helper_sve2_rshrnb_s },
7301 { .fno = gen_helper_sve2_rshrnb_d },
7302 };
7303 return do_sve2_shr_narrow(s, a, ops);
7304}
7305
7306static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7307{
7308 static const GVecGen2i ops[3] = {
7309 { .fno = gen_helper_sve2_rshrnt_h },
7310 { .fno = gen_helper_sve2_rshrnt_s },
7311 { .fno = gen_helper_sve2_rshrnt_d },
7312 };
7313 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
7314}
7315
7316static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7317 TCGv_vec n, int64_t shr)
7318{
7319 TCGv_vec t = tcg_temp_new_vec_matching(d);
7320 int halfbits = 4 << vece;
7321
7322 tcg_gen_sari_vec(vece, n, n, shr);
7323 tcg_gen_dupi_vec(vece, t, 0);
7324 tcg_gen_smax_vec(vece, n, n, t);
7325 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7326 tcg_gen_umin_vec(vece, d, n, t);
7327 tcg_temp_free_vec(t);
7328}
7329
7330static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7331{
7332 static const TCGOpcode vec_list[] = {
7333 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7334 };
7335 static const GVecGen2i ops[3] = {
7336 { .fniv = gen_sqshrunb_vec,
7337 .opt_opc = vec_list,
7338 .fno = gen_helper_sve2_sqshrunb_h,
7339 .vece = MO_16 },
7340 { .fniv = gen_sqshrunb_vec,
7341 .opt_opc = vec_list,
7342 .fno = gen_helper_sve2_sqshrunb_s,
7343 .vece = MO_32 },
7344 { .fniv = gen_sqshrunb_vec,
7345 .opt_opc = vec_list,
7346 .fno = gen_helper_sve2_sqshrunb_d,
7347 .vece = MO_64 },
7348 };
7349 return do_sve2_shr_narrow(s, a, ops);
7350}
7351
7352static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7353 TCGv_vec n, int64_t shr)
7354{
7355 TCGv_vec t = tcg_temp_new_vec_matching(d);
7356 int halfbits = 4 << vece;
7357
7358 tcg_gen_sari_vec(vece, n, n, shr);
7359 tcg_gen_dupi_vec(vece, t, 0);
7360 tcg_gen_smax_vec(vece, n, n, t);
7361 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7362 tcg_gen_umin_vec(vece, n, n, t);
7363 tcg_gen_shli_vec(vece, n, n, halfbits);
7364 tcg_gen_bitsel_vec(vece, d, t, d, n);
7365 tcg_temp_free_vec(t);
7366}
7367
7368static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7369{
7370 static const TCGOpcode vec_list[] = {
7371 INDEX_op_shli_vec, INDEX_op_sari_vec,
7372 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7373 };
7374 static const GVecGen2i ops[3] = {
7375 { .fniv = gen_sqshrunt_vec,
7376 .opt_opc = vec_list,
7377 .load_dest = true,
7378 .fno = gen_helper_sve2_sqshrunt_h,
7379 .vece = MO_16 },
7380 { .fniv = gen_sqshrunt_vec,
7381 .opt_opc = vec_list,
7382 .load_dest = true,
7383 .fno = gen_helper_sve2_sqshrunt_s,
7384 .vece = MO_32 },
7385 { .fniv = gen_sqshrunt_vec,
7386 .opt_opc = vec_list,
7387 .load_dest = true,
7388 .fno = gen_helper_sve2_sqshrunt_d,
7389 .vece = MO_64 },
7390 };
7391 return do_sve2_shr_narrow(s, a, ops);
7392}
7393
7394static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7395{
7396 static const GVecGen2i ops[3] = {
7397 { .fno = gen_helper_sve2_sqrshrunb_h },
7398 { .fno = gen_helper_sve2_sqrshrunb_s },
7399 { .fno = gen_helper_sve2_sqrshrunb_d },
7400 };
7401 return do_sve2_shr_narrow(s, a, ops);
7402}
7403
7404static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7405{
7406 static const GVecGen2i ops[3] = {
7407 { .fno = gen_helper_sve2_sqrshrunt_h },
7408 { .fno = gen_helper_sve2_sqrshrunt_s },
7409 { .fno = gen_helper_sve2_sqrshrunt_d },
7410 };
7411 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7412}
7413
743bb147
RH
7414static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7415 TCGv_vec n, int64_t shr)
7416{
7417 TCGv_vec t = tcg_temp_new_vec_matching(d);
7418 int halfbits = 4 << vece;
7419 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7420 int64_t min = -max - 1;
7421
7422 tcg_gen_sari_vec(vece, n, n, shr);
7423 tcg_gen_dupi_vec(vece, t, min);
7424 tcg_gen_smax_vec(vece, n, n, t);
7425 tcg_gen_dupi_vec(vece, t, max);
7426 tcg_gen_smin_vec(vece, n, n, t);
7427 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7428 tcg_gen_and_vec(vece, d, n, t);
7429 tcg_temp_free_vec(t);
7430}
7431
7432static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7433{
7434 static const TCGOpcode vec_list[] = {
7435 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7436 };
7437 static const GVecGen2i ops[3] = {
7438 { .fniv = gen_sqshrnb_vec,
7439 .opt_opc = vec_list,
7440 .fno = gen_helper_sve2_sqshrnb_h,
7441 .vece = MO_16 },
7442 { .fniv = gen_sqshrnb_vec,
7443 .opt_opc = vec_list,
7444 .fno = gen_helper_sve2_sqshrnb_s,
7445 .vece = MO_32 },
7446 { .fniv = gen_sqshrnb_vec,
7447 .opt_opc = vec_list,
7448 .fno = gen_helper_sve2_sqshrnb_d,
7449 .vece = MO_64 },
7450 };
7451 return do_sve2_shr_narrow(s, a, ops);
7452}
7453
7454static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7455 TCGv_vec n, int64_t shr)
7456{
7457 TCGv_vec t = tcg_temp_new_vec_matching(d);
7458 int halfbits = 4 << vece;
7459 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7460 int64_t min = -max - 1;
7461
7462 tcg_gen_sari_vec(vece, n, n, shr);
7463 tcg_gen_dupi_vec(vece, t, min);
7464 tcg_gen_smax_vec(vece, n, n, t);
7465 tcg_gen_dupi_vec(vece, t, max);
7466 tcg_gen_smin_vec(vece, n, n, t);
7467 tcg_gen_shli_vec(vece, n, n, halfbits);
7468 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7469 tcg_gen_bitsel_vec(vece, d, t, d, n);
7470 tcg_temp_free_vec(t);
7471}
7472
7473static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7474{
7475 static const TCGOpcode vec_list[] = {
7476 INDEX_op_shli_vec, INDEX_op_sari_vec,
7477 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7478 };
7479 static const GVecGen2i ops[3] = {
7480 { .fniv = gen_sqshrnt_vec,
7481 .opt_opc = vec_list,
7482 .load_dest = true,
7483 .fno = gen_helper_sve2_sqshrnt_h,
7484 .vece = MO_16 },
7485 { .fniv = gen_sqshrnt_vec,
7486 .opt_opc = vec_list,
7487 .load_dest = true,
7488 .fno = gen_helper_sve2_sqshrnt_s,
7489 .vece = MO_32 },
7490 { .fniv = gen_sqshrnt_vec,
7491 .opt_opc = vec_list,
7492 .load_dest = true,
7493 .fno = gen_helper_sve2_sqshrnt_d,
7494 .vece = MO_64 },
7495 };
7496 return do_sve2_shr_narrow(s, a, ops);
7497}
7498
7499static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7500{
7501 static const GVecGen2i ops[3] = {
7502 { .fno = gen_helper_sve2_sqrshrnb_h },
7503 { .fno = gen_helper_sve2_sqrshrnb_s },
7504 { .fno = gen_helper_sve2_sqrshrnb_d },
7505 };
7506 return do_sve2_shr_narrow(s, a, ops);
7507}
7508
7509static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7510{
7511 static const GVecGen2i ops[3] = {
7512 { .fno = gen_helper_sve2_sqrshrnt_h },
7513 { .fno = gen_helper_sve2_sqrshrnt_s },
7514 { .fno = gen_helper_sve2_sqrshrnt_d },
7515 };
7516 return do_sve2_shr_narrow(s, a, ops);
7517}
7518
c13418da
RH
7519static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7520 TCGv_vec n, int64_t shr)
7521{
7522 TCGv_vec t = tcg_temp_new_vec_matching(d);
7523 int halfbits = 4 << vece;
7524
7525 tcg_gen_shri_vec(vece, n, n, shr);
7526 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7527 tcg_gen_umin_vec(vece, d, n, t);
7528 tcg_temp_free_vec(t);
7529}
7530
7531static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7532{
7533 static const TCGOpcode vec_list[] = {
7534 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7535 };
7536 static const GVecGen2i ops[3] = {
7537 { .fniv = gen_uqshrnb_vec,
7538 .opt_opc = vec_list,
7539 .fno = gen_helper_sve2_uqshrnb_h,
7540 .vece = MO_16 },
7541 { .fniv = gen_uqshrnb_vec,
7542 .opt_opc = vec_list,
7543 .fno = gen_helper_sve2_uqshrnb_s,
7544 .vece = MO_32 },
7545 { .fniv = gen_uqshrnb_vec,
7546 .opt_opc = vec_list,
7547 .fno = gen_helper_sve2_uqshrnb_d,
7548 .vece = MO_64 },
7549 };
7550 return do_sve2_shr_narrow(s, a, ops);
7551}
7552
7553static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7554 TCGv_vec n, int64_t shr)
7555{
7556 TCGv_vec t = tcg_temp_new_vec_matching(d);
7557 int halfbits = 4 << vece;
7558
7559 tcg_gen_shri_vec(vece, n, n, shr);
7560 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7561 tcg_gen_umin_vec(vece, n, n, t);
7562 tcg_gen_shli_vec(vece, n, n, halfbits);
7563 tcg_gen_bitsel_vec(vece, d, t, d, n);
7564 tcg_temp_free_vec(t);
7565}
7566
7567static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7568{
7569 static const TCGOpcode vec_list[] = {
7570 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7571 };
7572 static const GVecGen2i ops[3] = {
7573 { .fniv = gen_uqshrnt_vec,
7574 .opt_opc = vec_list,
7575 .load_dest = true,
7576 .fno = gen_helper_sve2_uqshrnt_h,
7577 .vece = MO_16 },
7578 { .fniv = gen_uqshrnt_vec,
7579 .opt_opc = vec_list,
7580 .load_dest = true,
7581 .fno = gen_helper_sve2_uqshrnt_s,
7582 .vece = MO_32 },
7583 { .fniv = gen_uqshrnt_vec,
7584 .opt_opc = vec_list,
7585 .load_dest = true,
7586 .fno = gen_helper_sve2_uqshrnt_d,
7587 .vece = MO_64 },
7588 };
7589 return do_sve2_shr_narrow(s, a, ops);
7590}
7591
7592static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7593{
7594 static const GVecGen2i ops[3] = {
7595 { .fno = gen_helper_sve2_uqrshrnb_h },
7596 { .fno = gen_helper_sve2_uqrshrnb_s },
7597 { .fno = gen_helper_sve2_uqrshrnb_d },
7598 };
7599 return do_sve2_shr_narrow(s, a, ops);
7600}
7601
7602static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7603{
7604 static const GVecGen2i ops[3] = {
7605 { .fno = gen_helper_sve2_uqrshrnt_h },
7606 { .fno = gen_helper_sve2_uqrshrnt_s },
7607 { .fno = gen_helper_sve2_uqrshrnt_d },
7608 };
7609 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7610}
b87dbeeb 7611
40d5ea50 7612#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7613 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7614 NULL, gen_helper_sve2_##name##_h, \
7615 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7616 }; \
bd394cf5
RH
7617 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7618 name##_fns[a->esz], a, 0)
40d5ea50
SL
7619
7620DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7621DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7622DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7623DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7624
c3cd6766
SL
7625DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7626DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7627DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7628DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7629
e0ae6ec3
SL
7630static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
7631 gen_helper_gvec_flags_4 *fn)
7632{
7633 if (!dc_isar_feature(aa64_sve2, s)) {
7634 return false;
7635 }
7636 return do_ppzz_flags(s, a, fn);
7637}
7638
7639#define DO_SVE2_PPZZ_MATCH(NAME, name) \
7640static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7641{ \
7642 static gen_helper_gvec_flags_4 * const fns[4] = { \
7643 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
7644 NULL, NULL \
7645 }; \
7646 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
7647}
7648
7649DO_SVE2_PPZZ_MATCH(MATCH, match)
7650DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
7651
5880bdc0
RH
7652static gen_helper_gvec_4 * const histcnt_fns[4] = {
7653 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7654};
7655TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7656 histcnt_fns[a->esz], a, 0)
7d47ac94 7657
bd394cf5
RH
7658TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7659 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7660
b87dbeeb
SL
7661static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7662 gen_helper_gvec_4_ptr *fn)
7663{
7664 if (!dc_isar_feature(aa64_sve2, s)) {
7665 return false;
7666 }
7667 return do_zpzz_fp(s, a, fn);
7668}
7669
7670#define DO_SVE2_ZPZZ_FP(NAME, name) \
7671static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7672{ \
7673 static gen_helper_gvec_4_ptr * const fns[4] = { \
7674 NULL, gen_helper_sve2_##name##_zpzz_h, \
7675 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7676 }; \
7677 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7678}
7679
7680DO_SVE2_ZPZZ_FP(FADDP, faddp)
7681DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7682DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7683DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7684DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7685
7686/*
7687 * SVE Integer Multiply-Add (unpredicated)
7688 */
7689
4f26756b
SL
7690static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
7691{
7692 gen_helper_gvec_4_ptr *fn;
7693
7694 switch (a->esz) {
7695 case MO_32:
7696 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
7697 return false;
7698 }
7699 fn = gen_helper_fmmla_s;
7700 break;
7701 case MO_64:
7702 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
7703 return false;
7704 }
7705 fn = gen_helper_fmmla_d;
7706 break;
7707 default:
7708 return false;
7709 }
7710
7711 if (sve_access_check(s)) {
7712 unsigned vsz = vec_full_reg_size(s);
7713 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
7714 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7715 vec_full_reg_offset(s, a->rn),
7716 vec_full_reg_offset(s, a->rm),
7717 vec_full_reg_offset(s, a->ra),
7718 status, vsz, vsz, 0, fn);
7719 tcg_temp_free_ptr(status);
7720 }
7721 return true;
7722}
7723
eeb4e84d
RH
7724static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7725 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7726 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7727};
7728TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7729 sqdmlal_zzzw_fns[a->esz], a, 0)
7730TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7731 sqdmlal_zzzw_fns[a->esz], a, 3)
7732TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7733 sqdmlal_zzzw_fns[a->esz], a, 2)
7734
7735static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7736 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7737 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7738};
7739TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7740 sqdmlsl_zzzw_fns[a->esz], a, 0)
7741TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7742 sqdmlsl_zzzw_fns[a->esz], a, 3)
7743TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7744 sqdmlsl_zzzw_fns[a->esz], a, 2)
7745
7746static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7747 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7748 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7749};
7750TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7751 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7752
eeb4e84d
RH
7753static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7754 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7755 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7756};
7757TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7758 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7759
eeb4e84d
RH
7760static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7761 NULL, gen_helper_sve2_smlal_zzzw_h,
7762 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7763};
7764TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7765 smlal_zzzw_fns[a->esz], a, 0)
7766TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7767 smlal_zzzw_fns[a->esz], a, 1)
7768
7769static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7770 NULL, gen_helper_sve2_umlal_zzzw_h,
7771 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7772};
7773TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7774 umlal_zzzw_fns[a->esz], a, 0)
7775TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7776 umlal_zzzw_fns[a->esz], a, 1)
7777
7778static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7779 NULL, gen_helper_sve2_smlsl_zzzw_h,
7780 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7781};
7782TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7783 smlsl_zzzw_fns[a->esz], a, 0)
7784TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7785 smlsl_zzzw_fns[a->esz], a, 1)
7786
7787static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7788 NULL, gen_helper_sve2_umlsl_zzzw_h,
7789 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7790};
7791TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7792 umlsl_zzzw_fns[a->esz], a, 0)
7793TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7794 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7795
5f425b92
RH
7796static gen_helper_gvec_4 * const cmla_fns[] = {
7797 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7798 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7799};
7800TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7801 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7802
5f425b92
RH
7803static gen_helper_gvec_4 * const cdot_fns[] = {
7804 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7805};
7806TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7807 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7808
5f425b92
RH
7809static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7810 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7811 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7812};
7813TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7814 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7815
8740d694
RH
7816TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7817 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7818
0ea3cdbf
RH
7819TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7820 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7821
32e2ad65
RH
7822TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7823 gen_helper_crypto_aese, a, false)
7824TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7825 gen_helper_crypto_aese, a, true)
3cc7a88e 7826
32e2ad65
RH
7827TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7828 gen_helper_crypto_sm4e, a, 0)
7829TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7830 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f
RH
7831
7832static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
7833{
7834 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
7835 return false;
7836 }
50f6db5f 7837 return gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
3358eb3f 7838}
5c1b7226
RH
7839
7840static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
7841{
7842 if (!dc_isar_feature(aa64_sve2, s)) {
7843 return false;
7844 }
7845 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
7846}
7847
d29b17ca
RH
7848static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
7849{
7850 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7851 return false;
7852 }
7853 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
7854}
7855
5c1b7226
RH
7856static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
7857{
7858 if (!dc_isar_feature(aa64_sve2, s)) {
7859 return false;
7860 }
7861 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
7862}
83c2523f
SL
7863
7864static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
7865{
7866 if (!dc_isar_feature(aa64_sve2, s)) {
7867 return false;
7868 }
7869 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
7870}
7871
7872static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
7873{
7874 if (!dc_isar_feature(aa64_sve2, s)) {
7875 return false;
7876 }
7877 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
7878}
95365277
SL
7879
7880static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
7881{
7882 if (!dc_isar_feature(aa64_sve2, s)) {
7883 return false;
7884 }
7885 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
7886}
7887
7888static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
7889{
7890 if (!dc_isar_feature(aa64_sve2, s)) {
7891 return false;
7892 }
7893 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
7894}
631be02e
SL
7895
7896static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
7897{
7898 static gen_helper_gvec_3_ptr * const fns[] = {
7899 NULL, gen_helper_flogb_h,
7900 gen_helper_flogb_s, gen_helper_flogb_d
7901 };
7902
7903 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
7904 return false;
7905 }
7906 if (sve_access_check(s)) {
7907 TCGv_ptr status =
7908 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7909 unsigned vsz = vec_full_reg_size(s);
7910
7911 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
7912 vec_full_reg_offset(s, a->rn),
7913 pred_full_reg_offset(s, a->pg),
7914 status, vsz, vsz, 0, fns[a->esz]);
7915 tcg_temp_free_ptr(status);
7916 }
7917 return true;
7918}
50d102bd
SL
7919
7920static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7921{
7922 if (!dc_isar_feature(aa64_sve2, s)) {
7923 return false;
7924 }
7925 if (sve_access_check(s)) {
7926 unsigned vsz = vec_full_reg_size(s);
7927 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7928 vec_full_reg_offset(s, a->rn),
7929 vec_full_reg_offset(s, a->rm),
7930 vec_full_reg_offset(s, a->ra),
7931 cpu_env, vsz, vsz, (sel << 1) | sub,
7932 gen_helper_sve2_fmlal_zzzw_s);
7933 }
7934 return true;
7935}
7936
7937static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7938{
7939 return do_FMLAL_zzzw(s, a, false, false);
7940}
7941
7942static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7943{
7944 return do_FMLAL_zzzw(s, a, false, true);
7945}
7946
7947static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7948{
7949 return do_FMLAL_zzzw(s, a, true, false);
7950}
7951
7952static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7953{
7954 return do_FMLAL_zzzw(s, a, true, true);
7955}
7956
7957static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7958{
7959 if (!dc_isar_feature(aa64_sve2, s)) {
7960 return false;
7961 }
7962 if (sve_access_check(s)) {
7963 unsigned vsz = vec_full_reg_size(s);
7964 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
7965 vec_full_reg_offset(s, a->rn),
7966 vec_full_reg_offset(s, a->rm),
7967 vec_full_reg_offset(s, a->ra),
7968 cpu_env, vsz, vsz,
7969 (a->index << 2) | (sel << 1) | sub,
7970 gen_helper_sve2_fmlal_zzxw_s);
7971 }
7972 return true;
7973}
7974
7975static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7976{
7977 return do_FMLAL_zzxw(s, a, false, false);
7978}
7979
7980static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7981{
7982 return do_FMLAL_zzxw(s, a, false, true);
7983}
7984
7985static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7986{
7987 return do_FMLAL_zzxw(s, a, true, false);
7988}
7989
7990static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7991{
7992 return do_FMLAL_zzxw(s, a, true, true);
7993}
2323c5ff 7994
eec05e4e
RH
7995TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7996 gen_helper_gvec_smmla_b, a, 0)
7997TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7998 gen_helper_gvec_usmmla_b, a, 0)
7999TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
8000 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 8001
eec05e4e
RH
8002TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8003 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
8004TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
8005 gen_helper_gvec_bfdot_idx, a)
81266a1f 8006
eec05e4e
RH
8007TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
8008 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
8009
8010static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8011{
8012 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8013 return false;
8014 }
8015 if (sve_access_check(s)) {
8016 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8017 unsigned vsz = vec_full_reg_size(s);
8018
8019 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8020 vec_full_reg_offset(s, a->rn),
8021 vec_full_reg_offset(s, a->rm),
8022 vec_full_reg_offset(s, a->ra),
8023 status, vsz, vsz, sel,
8024 gen_helper_gvec_bfmlal);
8025 tcg_temp_free_ptr(status);
8026 }
8027 return true;
8028}
8029
8030static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8031{
8032 return do_BFMLAL_zzzw(s, a, false);
8033}
8034
8035static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8036{
8037 return do_BFMLAL_zzzw(s, a, true);
8038}
458d0ab6
RH
8039
8040static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8041{
8042 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8043 return false;
8044 }
8045 if (sve_access_check(s)) {
8046 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8047 unsigned vsz = vec_full_reg_size(s);
8048
8049 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8050 vec_full_reg_offset(s, a->rn),
8051 vec_full_reg_offset(s, a->rm),
8052 vec_full_reg_offset(s, a->ra),
8053 status, vsz, vsz, (a->index << 1) | sel,
8054 gen_helper_gvec_bfmlal_idx);
8055 tcg_temp_free_ptr(status);
8056 }
8057 return true;
8058}
8059
8060static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8061{
8062 return do_BFMLAL_zzxw(s, a, false);
8063}
8064
8065static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8066{
8067 return do_BFMLAL_zzxw(s, a, true);
8068}