]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for do_frint_mode
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
de58c6b0
RH
155static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
156 int rd, int rn, int data,
157 ARMFPStatusFlavour flavour)
158{
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 TCGv_ptr status = fpstatus_ptr(flavour);
165
166 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
167 vec_full_reg_offset(s, rn),
168 status, vsz, vsz, data, fn);
169 tcg_temp_free_ptr(status);
170 }
171 return true;
172}
173
174static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
175 arg_rr_esz *a, int data)
176{
177 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
178 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
179}
180
e645d1a1 181/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 182static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
183 int rd, int rn, int rm, int data)
184{
913a8a00
RH
185 if (fn == NULL) {
186 return false;
187 }
188 if (sve_access_check(s)) {
189 unsigned vsz = vec_full_reg_size(s);
190 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
191 vec_full_reg_offset(s, rn),
192 vec_full_reg_offset(s, rm),
193 vsz, vsz, data, fn);
194 }
195 return true;
e645d1a1
RH
196}
197
84a272f5
RH
198static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
199 arg_rrr_esz *a, int data)
200{
201 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
202}
203
532724e4
RH
204/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
205static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
206 int rd, int rn, int rm,
207 int data, ARMFPStatusFlavour flavour)
208{
209 if (fn == NULL) {
210 return false;
211 }
212 if (sve_access_check(s)) {
213 unsigned vsz = vec_full_reg_size(s);
214 TCGv_ptr status = fpstatus_ptr(flavour);
215
216 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
217 vec_full_reg_offset(s, rn),
218 vec_full_reg_offset(s, rm),
219 status, vsz, vsz, data, fn);
220
221 tcg_temp_free_ptr(status);
222 }
223 return true;
224}
225
226static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
227 arg_rrr_esz *a, int data)
228{
229 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
230 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
231}
232
38650638 233/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 234static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
235 int rd, int rn, int rm, int ra, int data)
236{
7ad416b1
RH
237 if (fn == NULL) {
238 return false;
239 }
240 if (sve_access_check(s)) {
241 unsigned vsz = vec_full_reg_size(s);
242 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
243 vec_full_reg_offset(s, rn),
244 vec_full_reg_offset(s, rm),
245 vec_full_reg_offset(s, ra),
246 vsz, vsz, data, fn);
247 }
248 return true;
38650638
RH
249}
250
cab79ac9
RH
251static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
252 arg_rrrr_esz *a, int data)
253{
254 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
255}
256
e82d3536
RH
257static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
258 arg_rrxr_esz *a)
259{
260 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
261}
262
41bf9b67
RH
263/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
264static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
265 int rd, int rn, int rm, int ra,
266 int data, TCGv_ptr ptr)
267{
268 if (fn == NULL) {
269 return false;
270 }
271 if (sve_access_check(s)) {
272 unsigned vsz = vec_full_reg_size(s);
273 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
274 vec_full_reg_offset(s, rn),
275 vec_full_reg_offset(s, rm),
276 vec_full_reg_offset(s, ra),
277 ptr, vsz, vsz, data, fn);
278 }
279 return true;
280}
281
282static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
283 int rd, int rn, int rm, int ra,
284 int data, ARMFPStatusFlavour flavour)
285{
286 TCGv_ptr status = fpstatus_ptr(flavour);
287 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
288 tcg_temp_free_ptr(status);
289 return ret;
290}
291
96a461f7 292/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 293static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
294 int rd, int rn, int pg, int data)
295{
8fb27a21
RH
296 if (fn == NULL) {
297 return false;
298 }
299 if (sve_access_check(s)) {
300 unsigned vsz = vec_full_reg_size(s);
301 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
302 vec_full_reg_offset(s, rn),
303 pred_full_reg_offset(s, pg),
304 vsz, vsz, data, fn);
305 }
306 return true;
96a461f7
RH
307}
308
b051809a
RH
309static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
310 arg_rpr_esz *a, int data)
311{
312 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
313}
314
afa2529c
RH
315static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
316 arg_rpri_esz *a)
317{
318 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
319}
b051809a 320
0360730c
RH
321static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
322 int rd, int rn, int pg, int data,
323 ARMFPStatusFlavour flavour)
324{
325 if (fn == NULL) {
326 return false;
327 }
328 if (sve_access_check(s)) {
329 unsigned vsz = vec_full_reg_size(s);
330 TCGv_ptr status = fpstatus_ptr(flavour);
331
332 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
333 vec_full_reg_offset(s, rn),
334 pred_full_reg_offset(s, pg),
335 status, vsz, vsz, data, fn);
336 tcg_temp_free_ptr(status);
337 }
338 return true;
339}
340
341static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
342 arg_rpr_esz *a, int data,
343 ARMFPStatusFlavour flavour)
344{
345 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
346}
347
36cbb7a8 348/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 349static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
350 int rd, int rn, int rm, int pg, int data)
351{
2a753d1e
RH
352 if (fn == NULL) {
353 return false;
354 }
355 if (sve_access_check(s)) {
356 unsigned vsz = vec_full_reg_size(s);
357 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
358 vec_full_reg_offset(s, rn),
359 vec_full_reg_offset(s, rm),
360 pred_full_reg_offset(s, pg),
361 vsz, vsz, data, fn);
362 }
363 return true;
36cbb7a8 364}
f7d79c41 365
312016c9
RH
366static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
367 arg_rprr_esz *a, int data)
368{
369 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
370}
371
faf915e2
RH
372/* Invoke a vector expander on two Zregs and an immediate. */
373static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
374 int esz, int rd, int rn, uint64_t imm)
375{
376 if (gvec_fn == NULL) {
377 return false;
378 }
379 if (sve_access_check(s)) {
380 unsigned vsz = vec_full_reg_size(s);
381 gvec_fn(esz, vec_full_reg_offset(s, rd),
382 vec_full_reg_offset(s, rn), imm, vsz, vsz);
383 }
384 return true;
385}
386
ada378f0
RH
387static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
388 arg_rri_esz *a)
389{
390 if (a->esz < 0) {
391 /* Invalid tsz encoding -- see tszimm_esz. */
392 return false;
393 }
394 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
395}
396
39eea561 397/* Invoke a vector expander on three Zregs. */
50f6db5f 398static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 399 int esz, int rd, int rn, int rm)
38388f7e 400{
50f6db5f
RH
401 if (gvec_fn == NULL) {
402 return false;
403 }
404 if (sve_access_check(s)) {
405 unsigned vsz = vec_full_reg_size(s);
406 gvec_fn(esz, vec_full_reg_offset(s, rd),
407 vec_full_reg_offset(s, rn),
408 vec_full_reg_offset(s, rm), vsz, vsz);
409 }
410 return true;
38388f7e
RH
411}
412
cd54bbe6
RH
413static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
414 arg_rrr_esz *a)
415{
416 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
417}
418
911cdc6d 419/* Invoke a vector expander on four Zregs. */
189876af
RH
420static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
421 arg_rrrr_esz *a)
911cdc6d 422{
189876af
RH
423 if (gvec_fn == NULL) {
424 return false;
425 }
426 if (sve_access_check(s)) {
427 unsigned vsz = vec_full_reg_size(s);
428 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
429 vec_full_reg_offset(s, a->rn),
430 vec_full_reg_offset(s, a->rm),
431 vec_full_reg_offset(s, a->ra), vsz, vsz);
432 }
433 return true;
911cdc6d
RH
434}
435
39eea561
RH
436/* Invoke a vector move on two Zregs. */
437static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 438{
f7d79c41 439 if (sve_access_check(s)) {
5f730621
RH
440 unsigned vsz = vec_full_reg_size(s);
441 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
442 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
443 }
444 return true;
38388f7e
RH
445}
446
d9d78dcc
RH
447/* Initialize a Zreg with replications of a 64-bit immediate. */
448static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
449{
450 unsigned vsz = vec_full_reg_size(s);
8711e71f 451 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
452}
453
516e246a 454/* Invoke a vector expander on three Pregs. */
23e5fa5f 455static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
dd81a8d7 456 int rd, int rn, int rm)
516e246a 457{
23e5fa5f
RH
458 if (sve_access_check(s)) {
459 unsigned psz = pred_gvec_reg_size(s);
460 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
461 pred_full_reg_offset(s, rn),
462 pred_full_reg_offset(s, rm), psz, psz);
463 }
464 return true;
516e246a
RH
465}
466
467/* Invoke a vector move on two Pregs. */
468static bool do_mov_p(DisasContext *s, int rd, int rn)
469{
d0b2df5a
RH
470 if (sve_access_check(s)) {
471 unsigned psz = pred_gvec_reg_size(s);
472 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
473 pred_full_reg_offset(s, rn), psz, psz);
474 }
475 return true;
516e246a
RH
476}
477
9e18d7a6
RH
478/* Set the cpu flags as per a return from an SVE helper. */
479static void do_pred_flags(TCGv_i32 t)
480{
481 tcg_gen_mov_i32(cpu_NF, t);
482 tcg_gen_andi_i32(cpu_ZF, t, 2);
483 tcg_gen_andi_i32(cpu_CF, t, 1);
484 tcg_gen_movi_i32(cpu_VF, 0);
485}
486
487/* Subroutines computing the ARM PredTest psuedofunction. */
488static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
489{
490 TCGv_i32 t = tcg_temp_new_i32();
491
492 gen_helper_sve_predtest1(t, d, g);
493 do_pred_flags(t);
494 tcg_temp_free_i32(t);
495}
496
497static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
498{
499 TCGv_ptr dptr = tcg_temp_new_ptr();
500 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 501 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
502
503 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
504 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 505
392acacc 506 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
507 tcg_temp_free_ptr(dptr);
508 tcg_temp_free_ptr(gptr);
509
510 do_pred_flags(t);
511 tcg_temp_free_i32(t);
512}
513
028e2a7b
RH
514/* For each element size, the bits within a predicate word that are active. */
515const uint64_t pred_esz_masks[4] = {
516 0xffffffffffffffffull, 0x5555555555555555ull,
517 0x1111111111111111ull, 0x0101010101010101ull
518};
519
c437c59b
RH
520static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
521{
522 unallocated_encoding(s);
523 return true;
524}
525
39eea561
RH
526/*
527 *** SVE Logical - Unpredicated Group
528 */
529
b262215b
RH
530TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
531TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
532TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
533TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 534
e6eba6e5
RH
535static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
536{
537 TCGv_i64 t = tcg_temp_new_i64();
538 uint64_t mask = dup_const(MO_8, 0xff >> sh);
539
540 tcg_gen_xor_i64(t, n, m);
541 tcg_gen_shri_i64(d, t, sh);
542 tcg_gen_shli_i64(t, t, 8 - sh);
543 tcg_gen_andi_i64(d, d, mask);
544 tcg_gen_andi_i64(t, t, ~mask);
545 tcg_gen_or_i64(d, d, t);
546 tcg_temp_free_i64(t);
547}
548
549static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
550{
551 TCGv_i64 t = tcg_temp_new_i64();
552 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
553
554 tcg_gen_xor_i64(t, n, m);
555 tcg_gen_shri_i64(d, t, sh);
556 tcg_gen_shli_i64(t, t, 16 - sh);
557 tcg_gen_andi_i64(d, d, mask);
558 tcg_gen_andi_i64(t, t, ~mask);
559 tcg_gen_or_i64(d, d, t);
560 tcg_temp_free_i64(t);
561}
562
563static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
564{
565 tcg_gen_xor_i32(d, n, m);
566 tcg_gen_rotri_i32(d, d, sh);
567}
568
569static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
570{
571 tcg_gen_xor_i64(d, n, m);
572 tcg_gen_rotri_i64(d, d, sh);
573}
574
575static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
576 TCGv_vec m, int64_t sh)
577{
578 tcg_gen_xor_vec(vece, d, n, m);
579 tcg_gen_rotri_vec(vece, d, d, sh);
580}
581
582void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
583 uint32_t rm_ofs, int64_t shift,
584 uint32_t opr_sz, uint32_t max_sz)
585{
586 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
587 static const GVecGen3i ops[4] = {
588 { .fni8 = gen_xar8_i64,
589 .fniv = gen_xar_vec,
590 .fno = gen_helper_sve2_xar_b,
591 .opt_opc = vecop,
592 .vece = MO_8 },
593 { .fni8 = gen_xar16_i64,
594 .fniv = gen_xar_vec,
595 .fno = gen_helper_sve2_xar_h,
596 .opt_opc = vecop,
597 .vece = MO_16 },
598 { .fni4 = gen_xar_i32,
599 .fniv = gen_xar_vec,
600 .fno = gen_helper_sve2_xar_s,
601 .opt_opc = vecop,
602 .vece = MO_32 },
603 { .fni8 = gen_xar_i64,
604 .fniv = gen_xar_vec,
605 .fno = gen_helper_gvec_xar_d,
606 .opt_opc = vecop,
607 .vece = MO_64 }
608 };
609 int esize = 8 << vece;
610
611 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
612 tcg_debug_assert(shift >= 0);
613 tcg_debug_assert(shift <= esize);
614 shift &= esize - 1;
615
616 if (shift == 0) {
617 /* xar with no rotate devolves to xor. */
618 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
619 } else {
620 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
621 shift, &ops[vece]);
622 }
623}
624
625static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
626{
627 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
628 return false;
629 }
630 if (sve_access_check(s)) {
631 unsigned vsz = vec_full_reg_size(s);
632 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
633 vec_full_reg_offset(s, a->rn),
634 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
635 }
636 return true;
637}
638
911cdc6d
RH
639static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
640{
641 tcg_gen_xor_i64(d, n, m);
642 tcg_gen_xor_i64(d, d, k);
643}
644
645static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
646 TCGv_vec m, TCGv_vec k)
647{
648 tcg_gen_xor_vec(vece, d, n, m);
649 tcg_gen_xor_vec(vece, d, d, k);
650}
651
652static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
653 uint32_t a, uint32_t oprsz, uint32_t maxsz)
654{
655 static const GVecGen4 op = {
656 .fni8 = gen_eor3_i64,
657 .fniv = gen_eor3_vec,
658 .fno = gen_helper_sve2_eor3,
659 .vece = MO_64,
660 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
661 };
662 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
663}
664
b773a5c8 665TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
666
667static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
668{
669 tcg_gen_andc_i64(d, m, k);
670 tcg_gen_xor_i64(d, d, n);
671}
672
673static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
674 TCGv_vec m, TCGv_vec k)
675{
676 tcg_gen_andc_vec(vece, d, m, k);
677 tcg_gen_xor_vec(vece, d, d, n);
678}
679
680static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
681 uint32_t a, uint32_t oprsz, uint32_t maxsz)
682{
683 static const GVecGen4 op = {
684 .fni8 = gen_bcax_i64,
685 .fniv = gen_bcax_vec,
686 .fno = gen_helper_sve2_bcax,
687 .vece = MO_64,
688 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
689 };
690 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
691}
692
b773a5c8 693TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
694
695static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
696 uint32_t a, uint32_t oprsz, uint32_t maxsz)
697{
698 /* BSL differs from the generic bitsel in argument ordering. */
699 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
700}
701
b773a5c8 702TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
703
704static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
705{
706 tcg_gen_andc_i64(n, k, n);
707 tcg_gen_andc_i64(m, m, k);
708 tcg_gen_or_i64(d, n, m);
709}
710
711static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
712 TCGv_vec m, TCGv_vec k)
713{
714 if (TCG_TARGET_HAS_bitsel_vec) {
715 tcg_gen_not_vec(vece, n, n);
716 tcg_gen_bitsel_vec(vece, d, k, n, m);
717 } else {
718 tcg_gen_andc_vec(vece, n, k, n);
719 tcg_gen_andc_vec(vece, m, m, k);
720 tcg_gen_or_vec(vece, d, n, m);
721 }
722}
723
724static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
725 uint32_t a, uint32_t oprsz, uint32_t maxsz)
726{
727 static const GVecGen4 op = {
728 .fni8 = gen_bsl1n_i64,
729 .fniv = gen_bsl1n_vec,
730 .fno = gen_helper_sve2_bsl1n,
731 .vece = MO_64,
732 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
733 };
734 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
735}
736
b773a5c8 737TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
738
739static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
740{
741 /*
742 * Z[dn] = (n & k) | (~m & ~k)
743 * = | ~(m | k)
744 */
745 tcg_gen_and_i64(n, n, k);
746 if (TCG_TARGET_HAS_orc_i64) {
747 tcg_gen_or_i64(m, m, k);
748 tcg_gen_orc_i64(d, n, m);
749 } else {
750 tcg_gen_nor_i64(m, m, k);
751 tcg_gen_or_i64(d, n, m);
752 }
753}
754
755static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
756 TCGv_vec m, TCGv_vec k)
757{
758 if (TCG_TARGET_HAS_bitsel_vec) {
759 tcg_gen_not_vec(vece, m, m);
760 tcg_gen_bitsel_vec(vece, d, k, n, m);
761 } else {
762 tcg_gen_and_vec(vece, n, n, k);
763 tcg_gen_or_vec(vece, m, m, k);
764 tcg_gen_orc_vec(vece, d, n, m);
765 }
766}
767
768static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
769 uint32_t a, uint32_t oprsz, uint32_t maxsz)
770{
771 static const GVecGen4 op = {
772 .fni8 = gen_bsl2n_i64,
773 .fniv = gen_bsl2n_vec,
774 .fno = gen_helper_sve2_bsl2n,
775 .vece = MO_64,
776 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
777 };
778 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
779}
780
b773a5c8 781TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
782
783static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
784{
785 tcg_gen_and_i64(n, n, k);
786 tcg_gen_andc_i64(m, m, k);
787 tcg_gen_nor_i64(d, n, m);
788}
789
790static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
791 TCGv_vec m, TCGv_vec k)
792{
793 tcg_gen_bitsel_vec(vece, d, k, n, m);
794 tcg_gen_not_vec(vece, d, d);
795}
796
797static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
798 uint32_t a, uint32_t oprsz, uint32_t maxsz)
799{
800 static const GVecGen4 op = {
801 .fni8 = gen_nbsl_i64,
802 .fniv = gen_nbsl_vec,
803 .fno = gen_helper_sve2_nbsl,
804 .vece = MO_64,
805 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
806 };
807 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
808}
809
b773a5c8 810TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 811
fea98f9c
RH
812/*
813 *** SVE Integer Arithmetic - Unpredicated Group
814 */
815
b262215b
RH
816TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
817TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
818TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
819TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
820TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
821TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 822
f97cfd59
RH
823/*
824 *** SVE Integer Arithmetic - Binary Predicated Group
825 */
826
a2103582
RH
827/* Select active elememnts from Zn and inactive elements from Zm,
828 * storing the result in Zd.
829 */
68cc4ee3 830static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
831{
832 static gen_helper_gvec_4 * const fns[4] = {
833 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
834 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
835 };
68cc4ee3 836 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
837}
838
8e7acb24
RH
839#define DO_ZPZZ(NAME, FEAT, name) \
840 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
841 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
842 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 843 }; \
8e7acb24
RH
844 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
845 name##_zpzz_fns[a->esz], a, 0)
846
847DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
848DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
849DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
850DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
851
852DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
853DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
854
855DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
856DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
857DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
858DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
859DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
860DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
861
862DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
863DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
864DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
865
866DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
867DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
868DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
869
870static gen_helper_gvec_4 * const sdiv_fns[4] = {
871 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
872};
873TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 874
8e7acb24
RH
875static gen_helper_gvec_4 * const udiv_fns[4] = {
876 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
877};
878TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 879
29693f5f 880TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
d3fe4a29 881
afac6d04
RH
882/*
883 *** SVE Integer Arithmetic - Unary Predicated Group
884 */
885
817bd5c9
RH
886#define DO_ZPZ(NAME, FEAT, name) \
887 static gen_helper_gvec_3 * const name##_fns[4] = { \
888 gen_helper_##name##_b, gen_helper_##name##_h, \
889 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 890 }; \
817bd5c9
RH
891 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
892
893DO_ZPZ(CLS, aa64_sve, sve_cls)
894DO_ZPZ(CLZ, aa64_sve, sve_clz)
895DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
896DO_ZPZ(CNOT, aa64_sve, sve_cnot)
897DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
898DO_ZPZ(ABS, aa64_sve, sve_abs)
899DO_ZPZ(NEG, aa64_sve, sve_neg)
900DO_ZPZ(RBIT, aa64_sve, sve_rbit)
901
902static gen_helper_gvec_3 * const fabs_fns[4] = {
903 NULL, gen_helper_sve_fabs_h,
904 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
905};
906TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 907
817bd5c9
RH
908static gen_helper_gvec_3 * const fneg_fns[4] = {
909 NULL, gen_helper_sve_fneg_h,
910 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
911};
912TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 913
817bd5c9
RH
914static gen_helper_gvec_3 * const sxtb_fns[4] = {
915 NULL, gen_helper_sve_sxtb_h,
916 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
917};
918TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 919
817bd5c9
RH
920static gen_helper_gvec_3 * const uxtb_fns[4] = {
921 NULL, gen_helper_sve_uxtb_h,
922 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
923};
924TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 925
817bd5c9
RH
926static gen_helper_gvec_3 * const sxth_fns[4] = {
927 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
928};
929TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 930
817bd5c9
RH
931static gen_helper_gvec_3 * const uxth_fns[4] = {
932 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
933};
934TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 935
817bd5c9
RH
936TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
937 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
938TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
939 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 940
047cec97
RH
941/*
942 *** SVE Integer Reduction Group
943 */
944
945typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
946static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
947 gen_helper_gvec_reduc *fn)
948{
949 unsigned vsz = vec_full_reg_size(s);
950 TCGv_ptr t_zn, t_pg;
951 TCGv_i32 desc;
952 TCGv_i64 temp;
953
954 if (fn == NULL) {
955 return false;
956 }
957 if (!sve_access_check(s)) {
958 return true;
959 }
960
c6a59b55 961 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
962 temp = tcg_temp_new_i64();
963 t_zn = tcg_temp_new_ptr();
964 t_pg = tcg_temp_new_ptr();
965
966 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
967 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
968 fn(temp, t_zn, t_pg, desc);
969 tcg_temp_free_ptr(t_zn);
970 tcg_temp_free_ptr(t_pg);
047cec97
RH
971
972 write_fp_dreg(s, a->rd, temp);
973 tcg_temp_free_i64(temp);
974 return true;
975}
976
977#define DO_VPZ(NAME, name) \
9ac24f1f 978 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
979 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
980 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
981 }; \
9ac24f1f 982 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
983
984DO_VPZ(ORV, orv)
985DO_VPZ(ANDV, andv)
986DO_VPZ(EORV, eorv)
987
988DO_VPZ(UADDV, uaddv)
989DO_VPZ(SMAXV, smaxv)
990DO_VPZ(UMAXV, umaxv)
991DO_VPZ(SMINV, sminv)
992DO_VPZ(UMINV, uminv)
993
9ac24f1f
RH
994static gen_helper_gvec_reduc * const saddv_fns[4] = {
995 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
996 gen_helper_sve_saddv_s, NULL
997};
998TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
999
1000#undef DO_VPZ
1001
ccd841c3
RH
1002/*
1003 *** SVE Shift by Immediate - Predicated Group
1004 */
1005
60245996
RH
1006/*
1007 * Copy Zn into Zd, storing zeros into inactive elements.
1008 * If invert, store zeros into the active elements.
ccd841c3 1009 */
60245996
RH
1010static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1011 int esz, bool invert)
ccd841c3 1012{
60245996
RH
1013 static gen_helper_gvec_3 * const fns[4] = {
1014 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1015 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 1016 };
8fb27a21 1017 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
1018}
1019
73c558a8
RH
1020static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1021 gen_helper_gvec_3 * const fns[4])
ccd841c3 1022{
73c558a8
RH
1023 int max;
1024
ccd841c3
RH
1025 if (a->esz < 0) {
1026 /* Invalid tsz encoding -- see tszimm_esz. */
1027 return false;
1028 }
73c558a8
RH
1029
1030 /*
1031 * Shift by element size is architecturally valid.
1032 * For arithmetic right-shift, it's the same as by one less.
1033 * For logical shifts and ASRD, it is a zeroing operation.
1034 */
1035 max = 8 << a->esz;
1036 if (a->imm >= max) {
1037 if (asr) {
1038 a->imm = max - 1;
1039 } else {
1040 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1041 }
1042 }
afa2529c 1043 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1044}
1045
5cccd1f1
RH
1046static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1047 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1048 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1049};
1050TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 1051
5cccd1f1
RH
1052static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1053 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1054 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1055};
1056TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 1057
5cccd1f1
RH
1058static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1059 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1060 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1061};
1062TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 1063
5cccd1f1
RH
1064static gen_helper_gvec_3 * const asrd_fns[4] = {
1065 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1066 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1067};
1068TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 1069
4df37e41
RH
1070static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1071 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1072 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1073};
1074TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1075 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1076
4df37e41
RH
1077static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1078 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1079 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1080};
1081TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1082 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1083
4df37e41
RH
1084static gen_helper_gvec_3 * const srshr_fns[4] = {
1085 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1086 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1087};
1088TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1089 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1090
4df37e41
RH
1091static gen_helper_gvec_3 * const urshr_fns[4] = {
1092 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1093 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1094};
1095TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1096 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1097
4df37e41
RH
1098static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1099 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1100 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1101};
1102TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1103 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1104
fe7f8dfb
RH
1105/*
1106 *** SVE Bitwise Shift - Predicated Group
1107 */
1108
1109#define DO_ZPZW(NAME, name) \
8e7acb24 1110 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1111 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1112 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1113 }; \
8e7acb24
RH
1114 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1115 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1116
1117DO_ZPZW(ASR, asr)
1118DO_ZPZW(LSR, lsr)
1119DO_ZPZW(LSL, lsl)
1120
1121#undef DO_ZPZW
1122
d9d78dcc
RH
1123/*
1124 *** SVE Bitwise Shift - Unpredicated Group
1125 */
1126
1127static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1128 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1129 int64_t, uint32_t, uint32_t))
1130{
1131 if (a->esz < 0) {
1132 /* Invalid tsz encoding -- see tszimm_esz. */
1133 return false;
1134 }
1135 if (sve_access_check(s)) {
1136 unsigned vsz = vec_full_reg_size(s);
1137 /* Shift by element size is architecturally valid. For
1138 arithmetic right-shift, it's the same as by one less.
1139 Otherwise it is a zeroing operation. */
1140 if (a->imm >= 8 << a->esz) {
1141 if (asr) {
1142 a->imm = (8 << a->esz) - 1;
1143 } else {
1144 do_dupi_z(s, a->rd, 0);
1145 return true;
1146 }
1147 }
1148 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1149 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1150 }
1151 return true;
1152}
1153
5e612f80
RH
1154TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1155TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1156TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1157
d9d78dcc 1158#define DO_ZZW(NAME, name) \
32e2ad65 1159 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1160 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1161 gen_helper_sve_##name##_zzw_s, NULL \
1162 }; \
32e2ad65
RH
1163 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1164 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1165
32e2ad65
RH
1166DO_ZZW(ASR_zzw, asr)
1167DO_ZZW(LSR_zzw, lsr)
1168DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1169
1170#undef DO_ZZW
1171
96a36e4a
RH
1172/*
1173 *** SVE Integer Multiply-Add Group
1174 */
1175
1176static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1177 gen_helper_gvec_5 *fn)
1178{
1179 if (sve_access_check(s)) {
1180 unsigned vsz = vec_full_reg_size(s);
1181 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1182 vec_full_reg_offset(s, a->ra),
1183 vec_full_reg_offset(s, a->rn),
1184 vec_full_reg_offset(s, a->rm),
1185 pred_full_reg_offset(s, a->pg),
1186 vsz, vsz, 0, fn);
1187 }
1188 return true;
1189}
1190
dc67e645
RH
1191static gen_helper_gvec_5 * const mla_fns[4] = {
1192 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1193 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1194};
1195TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1196
dc67e645
RH
1197static gen_helper_gvec_5 * const mls_fns[4] = {
1198 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1199 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1200};
1201TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1202
9a56c9c3
RH
1203/*
1204 *** SVE Index Generation Group
1205 */
1206
6687d05d 1207static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1208 TCGv_i64 start, TCGv_i64 incr)
1209{
6687d05d
RH
1210 unsigned vsz;
1211 TCGv_i32 desc;
1212 TCGv_ptr t_zd;
1213
1214 if (!sve_access_check(s)) {
1215 return true;
1216 }
1217
1218 vsz = vec_full_reg_size(s);
1219 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1220 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1221
1222 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1223 if (esz == 3) {
1224 gen_helper_sve_index_d(t_zd, start, incr, desc);
1225 } else {
1226 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1227 static index_fn * const fns[3] = {
1228 gen_helper_sve_index_b,
1229 gen_helper_sve_index_h,
1230 gen_helper_sve_index_s,
1231 };
1232 TCGv_i32 s32 = tcg_temp_new_i32();
1233 TCGv_i32 i32 = tcg_temp_new_i32();
1234
1235 tcg_gen_extrl_i64_i32(s32, start);
1236 tcg_gen_extrl_i64_i32(i32, incr);
1237 fns[esz](t_zd, s32, i32, desc);
1238
1239 tcg_temp_free_i32(s32);
1240 tcg_temp_free_i32(i32);
1241 }
1242 tcg_temp_free_ptr(t_zd);
6687d05d 1243 return true;
9a56c9c3
RH
1244}
1245
9aa60c83
RH
1246TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1247 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1248TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1249 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1250TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1251 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1252TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1253 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1254
96f922cc
RH
1255/*
1256 *** SVE Stack Allocation Group
1257 */
1258
3a7be554 1259static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1260{
5de56742
AC
1261 if (sve_access_check(s)) {
1262 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1263 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1264 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1265 }
96f922cc
RH
1266 return true;
1267}
1268
3a7be554 1269static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1270{
5de56742
AC
1271 if (sve_access_check(s)) {
1272 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1273 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1274 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1275 }
96f922cc
RH
1276 return true;
1277}
1278
3a7be554 1279static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1280{
5de56742
AC
1281 if (sve_access_check(s)) {
1282 TCGv_i64 reg = cpu_reg(s, a->rd);
1283 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1284 }
96f922cc
RH
1285 return true;
1286}
1287
4b242d9c
RH
1288/*
1289 *** SVE Compute Vector Address Group
1290 */
1291
1292static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1293{
913a8a00 1294 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1295}
1296
dcba3d67
RH
1297TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1298TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1299TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1300TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1301
0762cd42
RH
1302/*
1303 *** SVE Integer Misc - Unpredicated Group
1304 */
1305
0ea3cdbf
RH
1306static gen_helper_gvec_2 * const fexpa_fns[4] = {
1307 NULL, gen_helper_sve_fexpa_h,
1308 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1309};
1310TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1311 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1312
32e2ad65
RH
1313static gen_helper_gvec_3 * const ftssel_fns[4] = {
1314 NULL, gen_helper_sve_ftssel_h,
1315 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1316};
1317TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1318
516e246a
RH
1319/*
1320 *** SVE Predicate Logical Operations Group
1321 */
1322
1323static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1324 const GVecGen4 *gvec_op)
1325{
1326 if (!sve_access_check(s)) {
1327 return true;
1328 }
1329
1330 unsigned psz = pred_gvec_reg_size(s);
1331 int dofs = pred_full_reg_offset(s, a->rd);
1332 int nofs = pred_full_reg_offset(s, a->rn);
1333 int mofs = pred_full_reg_offset(s, a->rm);
1334 int gofs = pred_full_reg_offset(s, a->pg);
1335
dd81a8d7
RH
1336 if (!a->s) {
1337 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1338 return true;
1339 }
1340
516e246a
RH
1341 if (psz == 8) {
1342 /* Do the operation and the flags generation in temps. */
1343 TCGv_i64 pd = tcg_temp_new_i64();
1344 TCGv_i64 pn = tcg_temp_new_i64();
1345 TCGv_i64 pm = tcg_temp_new_i64();
1346 TCGv_i64 pg = tcg_temp_new_i64();
1347
1348 tcg_gen_ld_i64(pn, cpu_env, nofs);
1349 tcg_gen_ld_i64(pm, cpu_env, mofs);
1350 tcg_gen_ld_i64(pg, cpu_env, gofs);
1351
1352 gvec_op->fni8(pd, pn, pm, pg);
1353 tcg_gen_st_i64(pd, cpu_env, dofs);
1354
1355 do_predtest1(pd, pg);
1356
1357 tcg_temp_free_i64(pd);
1358 tcg_temp_free_i64(pn);
1359 tcg_temp_free_i64(pm);
1360 tcg_temp_free_i64(pg);
1361 } else {
1362 /* The operation and flags generation is large. The computation
1363 * of the flags depends on the original contents of the guarding
1364 * predicate. If the destination overwrites the guarding predicate,
1365 * then the easiest way to get this right is to save a copy.
1366 */
1367 int tofs = gofs;
1368 if (a->rd == a->pg) {
1369 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1370 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1371 }
1372
1373 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1374 do_predtest(s, dofs, tofs, psz / 8);
1375 }
1376 return true;
1377}
1378
1379static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1380{
1381 tcg_gen_and_i64(pd, pn, pm);
1382 tcg_gen_and_i64(pd, pd, pg);
1383}
1384
1385static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1386 TCGv_vec pm, TCGv_vec pg)
1387{
1388 tcg_gen_and_vec(vece, pd, pn, pm);
1389 tcg_gen_and_vec(vece, pd, pd, pg);
1390}
1391
3a7be554 1392static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1393{
1394 static const GVecGen4 op = {
1395 .fni8 = gen_and_pg_i64,
1396 .fniv = gen_and_pg_vec,
1397 .fno = gen_helper_sve_and_pppp,
1398 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1399 };
dd81a8d7
RH
1400
1401 if (!a->s) {
dd81a8d7
RH
1402 if (a->rn == a->rm) {
1403 if (a->pg == a->rn) {
23e5fa5f 1404 return do_mov_p(s, a->rd, a->rn);
dd81a8d7 1405 }
23e5fa5f 1406 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
dd81a8d7 1407 } else if (a->pg == a->rn || a->pg == a->rm) {
23e5fa5f 1408 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
516e246a 1409 }
516e246a 1410 }
dd81a8d7 1411 return do_pppp_flags(s, a, &op);
516e246a
RH
1412}
1413
1414static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1415{
1416 tcg_gen_andc_i64(pd, pn, pm);
1417 tcg_gen_and_i64(pd, pd, pg);
1418}
1419
1420static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1421 TCGv_vec pm, TCGv_vec pg)
1422{
1423 tcg_gen_andc_vec(vece, pd, pn, pm);
1424 tcg_gen_and_vec(vece, pd, pd, pg);
1425}
1426
3a7be554 1427static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1428{
1429 static const GVecGen4 op = {
1430 .fni8 = gen_bic_pg_i64,
1431 .fniv = gen_bic_pg_vec,
1432 .fno = gen_helper_sve_bic_pppp,
1433 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1434 };
dd81a8d7
RH
1435
1436 if (!a->s && a->pg == a->rn) {
23e5fa5f 1437 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
516e246a 1438 }
dd81a8d7 1439 return do_pppp_flags(s, a, &op);
516e246a
RH
1440}
1441
1442static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1443{
1444 tcg_gen_xor_i64(pd, pn, pm);
1445 tcg_gen_and_i64(pd, pd, pg);
1446}
1447
1448static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1449 TCGv_vec pm, TCGv_vec pg)
1450{
1451 tcg_gen_xor_vec(vece, pd, pn, pm);
1452 tcg_gen_and_vec(vece, pd, pd, pg);
1453}
1454
3a7be554 1455static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1456{
1457 static const GVecGen4 op = {
1458 .fni8 = gen_eor_pg_i64,
1459 .fniv = gen_eor_pg_vec,
1460 .fno = gen_helper_sve_eor_pppp,
1461 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1462 };
738b679c
RH
1463
1464 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1465 if (!a->s && a->pg == a->rm) {
1466 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1467 }
dd81a8d7 1468 return do_pppp_flags(s, a, &op);
516e246a
RH
1469}
1470
3a7be554 1471static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1472{
516e246a
RH
1473 if (a->s) {
1474 return false;
516e246a 1475 }
d4bc6232
RH
1476 if (sve_access_check(s)) {
1477 unsigned psz = pred_gvec_reg_size(s);
1478 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1479 pred_full_reg_offset(s, a->pg),
1480 pred_full_reg_offset(s, a->rn),
1481 pred_full_reg_offset(s, a->rm), psz, psz);
1482 }
1483 return true;
516e246a
RH
1484}
1485
1486static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1487{
1488 tcg_gen_or_i64(pd, pn, pm);
1489 tcg_gen_and_i64(pd, pd, pg);
1490}
1491
1492static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1493 TCGv_vec pm, TCGv_vec pg)
1494{
1495 tcg_gen_or_vec(vece, pd, pn, pm);
1496 tcg_gen_and_vec(vece, pd, pd, pg);
1497}
1498
3a7be554 1499static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1500{
1501 static const GVecGen4 op = {
1502 .fni8 = gen_orr_pg_i64,
1503 .fniv = gen_orr_pg_vec,
1504 .fno = gen_helper_sve_orr_pppp,
1505 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1506 };
dd81a8d7
RH
1507
1508 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1509 return do_mov_p(s, a->rd, a->rn);
516e246a 1510 }
dd81a8d7 1511 return do_pppp_flags(s, a, &op);
516e246a
RH
1512}
1513
1514static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1515{
1516 tcg_gen_orc_i64(pd, pn, pm);
1517 tcg_gen_and_i64(pd, pd, pg);
1518}
1519
1520static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1521 TCGv_vec pm, TCGv_vec pg)
1522{
1523 tcg_gen_orc_vec(vece, pd, pn, pm);
1524 tcg_gen_and_vec(vece, pd, pd, pg);
1525}
1526
3a7be554 1527static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1528{
1529 static const GVecGen4 op = {
1530 .fni8 = gen_orn_pg_i64,
1531 .fniv = gen_orn_pg_vec,
1532 .fno = gen_helper_sve_orn_pppp,
1533 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1534 };
dd81a8d7 1535 return do_pppp_flags(s, a, &op);
516e246a
RH
1536}
1537
1538static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1539{
1540 tcg_gen_or_i64(pd, pn, pm);
1541 tcg_gen_andc_i64(pd, pg, pd);
1542}
1543
1544static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1545 TCGv_vec pm, TCGv_vec pg)
1546{
1547 tcg_gen_or_vec(vece, pd, pn, pm);
1548 tcg_gen_andc_vec(vece, pd, pg, pd);
1549}
1550
3a7be554 1551static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1552{
1553 static const GVecGen4 op = {
1554 .fni8 = gen_nor_pg_i64,
1555 .fniv = gen_nor_pg_vec,
1556 .fno = gen_helper_sve_nor_pppp,
1557 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1558 };
dd81a8d7 1559 return do_pppp_flags(s, a, &op);
516e246a
RH
1560}
1561
1562static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1563{
1564 tcg_gen_and_i64(pd, pn, pm);
1565 tcg_gen_andc_i64(pd, pg, pd);
1566}
1567
1568static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1569 TCGv_vec pm, TCGv_vec pg)
1570{
1571 tcg_gen_and_vec(vece, pd, pn, pm);
1572 tcg_gen_andc_vec(vece, pd, pg, pd);
1573}
1574
3a7be554 1575static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1576{
1577 static const GVecGen4 op = {
1578 .fni8 = gen_nand_pg_i64,
1579 .fniv = gen_nand_pg_vec,
1580 .fno = gen_helper_sve_nand_pppp,
1581 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1582 };
dd81a8d7 1583 return do_pppp_flags(s, a, &op);
516e246a
RH
1584}
1585
9e18d7a6
RH
1586/*
1587 *** SVE Predicate Misc Group
1588 */
1589
3a7be554 1590static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1591{
1592 if (sve_access_check(s)) {
1593 int nofs = pred_full_reg_offset(s, a->rn);
1594 int gofs = pred_full_reg_offset(s, a->pg);
1595 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1596
1597 if (words == 1) {
1598 TCGv_i64 pn = tcg_temp_new_i64();
1599 TCGv_i64 pg = tcg_temp_new_i64();
1600
1601 tcg_gen_ld_i64(pn, cpu_env, nofs);
1602 tcg_gen_ld_i64(pg, cpu_env, gofs);
1603 do_predtest1(pn, pg);
1604
1605 tcg_temp_free_i64(pn);
1606 tcg_temp_free_i64(pg);
1607 } else {
1608 do_predtest(s, nofs, gofs, words);
1609 }
1610 }
1611 return true;
1612}
1613
028e2a7b
RH
1614/* See the ARM pseudocode DecodePredCount. */
1615static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1616{
1617 unsigned elements = fullsz >> esz;
1618 unsigned bound;
1619
1620 switch (pattern) {
1621 case 0x0: /* POW2 */
1622 return pow2floor(elements);
1623 case 0x1: /* VL1 */
1624 case 0x2: /* VL2 */
1625 case 0x3: /* VL3 */
1626 case 0x4: /* VL4 */
1627 case 0x5: /* VL5 */
1628 case 0x6: /* VL6 */
1629 case 0x7: /* VL7 */
1630 case 0x8: /* VL8 */
1631 bound = pattern;
1632 break;
1633 case 0x9: /* VL16 */
1634 case 0xa: /* VL32 */
1635 case 0xb: /* VL64 */
1636 case 0xc: /* VL128 */
1637 case 0xd: /* VL256 */
1638 bound = 16 << (pattern - 9);
1639 break;
1640 case 0x1d: /* MUL4 */
1641 return elements - elements % 4;
1642 case 0x1e: /* MUL3 */
1643 return elements - elements % 3;
1644 case 0x1f: /* ALL */
1645 return elements;
1646 default: /* #uimm5 */
1647 return 0;
1648 }
1649 return elements >= bound ? bound : 0;
1650}
1651
1652/* This handles all of the predicate initialization instructions,
1653 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1654 * so that decode_pred_count returns 0. For SETFFR, we will have
1655 * set RD == 16 == FFR.
1656 */
1657static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1658{
1659 if (!sve_access_check(s)) {
1660 return true;
1661 }
1662
1663 unsigned fullsz = vec_full_reg_size(s);
1664 unsigned ofs = pred_full_reg_offset(s, rd);
1665 unsigned numelem, setsz, i;
1666 uint64_t word, lastword;
1667 TCGv_i64 t;
1668
1669 numelem = decode_pred_count(fullsz, pat, esz);
1670
1671 /* Determine what we must store into each bit, and how many. */
1672 if (numelem == 0) {
1673 lastword = word = 0;
1674 setsz = fullsz;
1675 } else {
1676 setsz = numelem << esz;
1677 lastword = word = pred_esz_masks[esz];
1678 if (setsz % 64) {
973558a3 1679 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1680 }
1681 }
1682
1683 t = tcg_temp_new_i64();
1684 if (fullsz <= 64) {
1685 tcg_gen_movi_i64(t, lastword);
1686 tcg_gen_st_i64(t, cpu_env, ofs);
1687 goto done;
1688 }
1689
1690 if (word == lastword) {
1691 unsigned maxsz = size_for_gvec(fullsz / 8);
1692 unsigned oprsz = size_for_gvec(setsz / 8);
1693
1694 if (oprsz * 8 == setsz) {
8711e71f 1695 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1696 goto done;
1697 }
028e2a7b
RH
1698 }
1699
1700 setsz /= 8;
1701 fullsz /= 8;
1702
1703 tcg_gen_movi_i64(t, word);
973558a3 1704 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1705 tcg_gen_st_i64(t, cpu_env, ofs + i);
1706 }
1707 if (lastword != word) {
1708 tcg_gen_movi_i64(t, lastword);
1709 tcg_gen_st_i64(t, cpu_env, ofs + i);
1710 i += 8;
1711 }
1712 if (i < fullsz) {
1713 tcg_gen_movi_i64(t, 0);
1714 for (; i < fullsz; i += 8) {
1715 tcg_gen_st_i64(t, cpu_env, ofs + i);
1716 }
1717 }
1718
1719 done:
1720 tcg_temp_free_i64(t);
1721
1722 /* PTRUES */
1723 if (setflag) {
1724 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1725 tcg_gen_movi_i32(cpu_CF, word == 0);
1726 tcg_gen_movi_i32(cpu_VF, 0);
1727 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1728 }
1729 return true;
1730}
1731
b03a8501 1732TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1733
b03a8501
RH
1734/* Note pat == 31 is #all, to set all elements. */
1735TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1736
b03a8501
RH
1737/* Note pat == 32 is #unimp, to set no elements. */
1738TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1739
3a7be554 1740static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1741{
1742 /* The path through do_pppp_flags is complicated enough to want to avoid
1743 * duplication. Frob the arguments into the form of a predicated AND.
1744 */
1745 arg_rprr_s alt_a = {
1746 .rd = a->rd, .pg = a->pg, .s = a->s,
1747 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1748 };
3a7be554 1749 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1750}
1751
ff502658
RH
1752TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1753TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1754
1755static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1756 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1757 TCGv_ptr, TCGv_i32))
1758{
1759 if (!sve_access_check(s)) {
1760 return true;
1761 }
1762
1763 TCGv_ptr t_pd = tcg_temp_new_ptr();
1764 TCGv_ptr t_pg = tcg_temp_new_ptr();
1765 TCGv_i32 t;
86300b5d 1766 unsigned desc = 0;
028e2a7b 1767
86300b5d
RH
1768 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1769 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1770
1771 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1772 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1773 t = tcg_temp_new_i32();
028e2a7b 1774
392acacc 1775 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1776 tcg_temp_free_ptr(t_pd);
1777 tcg_temp_free_ptr(t_pg);
1778
1779 do_pred_flags(t);
1780 tcg_temp_free_i32(t);
1781 return true;
1782}
1783
d95040e3
RH
1784TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1785TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1786
24e82e68
RH
1787/*
1788 *** SVE Element Count Group
1789 */
1790
1791/* Perform an inline saturating addition of a 32-bit value within
1792 * a 64-bit register. The second operand is known to be positive,
1793 * which halves the comparisions we must perform to bound the result.
1794 */
1795static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1796{
1797 int64_t ibound;
24e82e68
RH
1798
1799 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1800 if (u) {
1801 tcg_gen_ext32u_i64(reg, reg);
1802 } else {
1803 tcg_gen_ext32s_i64(reg, reg);
1804 }
1805 if (d) {
1806 tcg_gen_sub_i64(reg, reg, val);
1807 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1808 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1809 } else {
1810 tcg_gen_add_i64(reg, reg, val);
1811 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1812 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1813 }
24e82e68
RH
1814}
1815
1816/* Similarly with 64-bit values. */
1817static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1818{
1819 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1820 TCGv_i64 t2;
1821
1822 if (u) {
1823 if (d) {
1824 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1825 t2 = tcg_constant_i64(0);
1826 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1827 } else {
1828 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1829 t2 = tcg_constant_i64(-1);
1830 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1831 }
1832 } else {
35a1ec8e 1833 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1834 if (d) {
1835 /* Detect signed overflow for subtraction. */
1836 tcg_gen_xor_i64(t0, reg, val);
1837 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1838 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1839 tcg_gen_and_i64(t0, t0, reg);
1840
1841 /* Bound the result. */
1842 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1843 t2 = tcg_constant_i64(0);
24e82e68
RH
1844 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1845 } else {
1846 /* Detect signed overflow for addition. */
1847 tcg_gen_xor_i64(t0, reg, val);
1848 tcg_gen_add_i64(reg, reg, val);
1849 tcg_gen_xor_i64(t1, reg, val);
1850 tcg_gen_andc_i64(t0, t1, t0);
1851
1852 /* Bound the result. */
1853 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1854 t2 = tcg_constant_i64(0);
24e82e68
RH
1855 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1856 }
35a1ec8e 1857 tcg_temp_free_i64(t1);
24e82e68
RH
1858 }
1859 tcg_temp_free_i64(t0);
24e82e68
RH
1860}
1861
1862/* Similarly with a vector and a scalar operand. */
1863static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1864 TCGv_i64 val, bool u, bool d)
1865{
1866 unsigned vsz = vec_full_reg_size(s);
1867 TCGv_ptr dptr, nptr;
1868 TCGv_i32 t32, desc;
1869 TCGv_i64 t64;
1870
1871 dptr = tcg_temp_new_ptr();
1872 nptr = tcg_temp_new_ptr();
1873 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1874 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1875 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1876
1877 switch (esz) {
1878 case MO_8:
1879 t32 = tcg_temp_new_i32();
1880 tcg_gen_extrl_i64_i32(t32, val);
1881 if (d) {
1882 tcg_gen_neg_i32(t32, t32);
1883 }
1884 if (u) {
1885 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1886 } else {
1887 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1888 }
1889 tcg_temp_free_i32(t32);
1890 break;
1891
1892 case MO_16:
1893 t32 = tcg_temp_new_i32();
1894 tcg_gen_extrl_i64_i32(t32, val);
1895 if (d) {
1896 tcg_gen_neg_i32(t32, t32);
1897 }
1898 if (u) {
1899 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1900 } else {
1901 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1902 }
1903 tcg_temp_free_i32(t32);
1904 break;
1905
1906 case MO_32:
1907 t64 = tcg_temp_new_i64();
1908 if (d) {
1909 tcg_gen_neg_i64(t64, val);
1910 } else {
1911 tcg_gen_mov_i64(t64, val);
1912 }
1913 if (u) {
1914 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1915 } else {
1916 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1917 }
1918 tcg_temp_free_i64(t64);
1919 break;
1920
1921 case MO_64:
1922 if (u) {
1923 if (d) {
1924 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1925 } else {
1926 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1927 }
1928 } else if (d) {
1929 t64 = tcg_temp_new_i64();
1930 tcg_gen_neg_i64(t64, val);
1931 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1932 tcg_temp_free_i64(t64);
1933 } else {
1934 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1935 }
1936 break;
1937
1938 default:
1939 g_assert_not_reached();
1940 }
1941
1942 tcg_temp_free_ptr(dptr);
1943 tcg_temp_free_ptr(nptr);
24e82e68
RH
1944}
1945
3a7be554 1946static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1947{
1948 if (sve_access_check(s)) {
1949 unsigned fullsz = vec_full_reg_size(s);
1950 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1951 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1952 }
1953 return true;
1954}
1955
3a7be554 1956static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1957{
1958 if (sve_access_check(s)) {
1959 unsigned fullsz = vec_full_reg_size(s);
1960 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1961 int inc = numelem * a->imm * (a->d ? -1 : 1);
1962 TCGv_i64 reg = cpu_reg(s, a->rd);
1963
1964 tcg_gen_addi_i64(reg, reg, inc);
1965 }
1966 return true;
1967}
1968
3a7be554 1969static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1970{
1971 if (!sve_access_check(s)) {
1972 return true;
1973 }
1974
1975 unsigned fullsz = vec_full_reg_size(s);
1976 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1977 int inc = numelem * a->imm;
1978 TCGv_i64 reg = cpu_reg(s, a->rd);
1979
1980 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1981 if (inc == 0) {
1982 if (a->u) {
1983 tcg_gen_ext32u_i64(reg, reg);
1984 } else {
1985 tcg_gen_ext32s_i64(reg, reg);
1986 }
1987 } else {
d681f125 1988 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1989 }
1990 return true;
1991}
1992
3a7be554 1993static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1994{
1995 if (!sve_access_check(s)) {
1996 return true;
1997 }
1998
1999 unsigned fullsz = vec_full_reg_size(s);
2000 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2001 int inc = numelem * a->imm;
2002 TCGv_i64 reg = cpu_reg(s, a->rd);
2003
2004 if (inc != 0) {
d681f125 2005 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2006 }
2007 return true;
2008}
2009
3a7be554 2010static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2011{
2012 if (a->esz == 0) {
2013 return false;
2014 }
2015
2016 unsigned fullsz = vec_full_reg_size(s);
2017 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2018 int inc = numelem * a->imm;
2019
2020 if (inc != 0) {
2021 if (sve_access_check(s)) {
24e82e68
RH
2022 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2023 vec_full_reg_offset(s, a->rn),
d681f125
RH
2024 tcg_constant_i64(a->d ? -inc : inc),
2025 fullsz, fullsz);
24e82e68
RH
2026 }
2027 } else {
2028 do_mov_z(s, a->rd, a->rn);
2029 }
2030 return true;
2031}
2032
3a7be554 2033static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2034{
2035 if (a->esz == 0) {
2036 return false;
2037 }
2038
2039 unsigned fullsz = vec_full_reg_size(s);
2040 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2041 int inc = numelem * a->imm;
2042
2043 if (inc != 0) {
2044 if (sve_access_check(s)) {
d681f125
RH
2045 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2046 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2047 }
2048 } else {
2049 do_mov_z(s, a->rd, a->rn);
2050 }
2051 return true;
2052}
2053
e1fa1164
RH
2054/*
2055 *** SVE Bitwise Immediate Group
2056 */
2057
2058static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2059{
2060 uint64_t imm;
2061 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2062 extract32(a->dbm, 0, 6),
2063 extract32(a->dbm, 6, 6))) {
2064 return false;
2065 }
faf915e2 2066 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2067}
2068
15a314da
RH
2069TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2070TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2071TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2072
3a7be554 2073static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2074{
2075 uint64_t imm;
2076 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2077 extract32(a->dbm, 0, 6),
2078 extract32(a->dbm, 6, 6))) {
2079 return false;
2080 }
2081 if (sve_access_check(s)) {
2082 do_dupi_z(s, a->rd, imm);
2083 }
2084 return true;
2085}
2086
f25a2361
RH
2087/*
2088 *** SVE Integer Wide Immediate - Predicated Group
2089 */
2090
2091/* Implement all merging copies. This is used for CPY (immediate),
2092 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2093 */
2094static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2095 TCGv_i64 val)
2096{
2097 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2098 static gen_cpy * const fns[4] = {
2099 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2100 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2101 };
2102 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2103 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2104 TCGv_ptr t_zd = tcg_temp_new_ptr();
2105 TCGv_ptr t_zn = tcg_temp_new_ptr();
2106 TCGv_ptr t_pg = tcg_temp_new_ptr();
2107
2108 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2109 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2110 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2111
2112 fns[esz](t_zd, t_zn, t_pg, val, desc);
2113
2114 tcg_temp_free_ptr(t_zd);
2115 tcg_temp_free_ptr(t_zn);
2116 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2117}
2118
3a7be554 2119static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2120{
2121 if (a->esz == 0) {
2122 return false;
2123 }
2124 if (sve_access_check(s)) {
2125 /* Decode the VFP immediate. */
2126 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2127 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2128 }
2129 return true;
2130}
2131
3a7be554 2132static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2133{
f25a2361 2134 if (sve_access_check(s)) {
e152b48b 2135 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2136 }
2137 return true;
2138}
2139
3a7be554 2140static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2141{
2142 static gen_helper_gvec_2i * const fns[4] = {
2143 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2144 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2145 };
2146
f25a2361
RH
2147 if (sve_access_check(s)) {
2148 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2149 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2150 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2151 tcg_constant_i64(a->imm),
2152 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2153 }
2154 return true;
2155}
2156
b94f8f60
RH
2157/*
2158 *** SVE Permute Extract Group
2159 */
2160
75114792 2161static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2162{
2163 if (!sve_access_check(s)) {
2164 return true;
2165 }
2166
2167 unsigned vsz = vec_full_reg_size(s);
75114792 2168 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2169 unsigned n_siz = vsz - n_ofs;
75114792
SL
2170 unsigned d = vec_full_reg_offset(s, rd);
2171 unsigned n = vec_full_reg_offset(s, rn);
2172 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2173
2174 /* Use host vector move insns if we have appropriate sizes
2175 * and no unfortunate overlap.
2176 */
2177 if (m != d
2178 && n_ofs == size_for_gvec(n_ofs)
2179 && n_siz == size_for_gvec(n_siz)
2180 && (d != n || n_siz <= n_ofs)) {
2181 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2182 if (n_ofs != 0) {
2183 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2184 }
2185 } else {
2186 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2187 }
2188 return true;
2189}
2190
c799c115
RH
2191TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2192TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2193
30562ab7
RH
2194/*
2195 *** SVE Permute - Unpredicated Group
2196 */
2197
3a7be554 2198static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2199{
2200 if (sve_access_check(s)) {
2201 unsigned vsz = vec_full_reg_size(s);
2202 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2203 vsz, vsz, cpu_reg_sp(s, a->rn));
2204 }
2205 return true;
2206}
2207
3a7be554 2208static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2209{
2210 if ((a->imm & 0x1f) == 0) {
2211 return false;
2212 }
2213 if (sve_access_check(s)) {
2214 unsigned vsz = vec_full_reg_size(s);
2215 unsigned dofs = vec_full_reg_offset(s, a->rd);
2216 unsigned esz, index;
2217
2218 esz = ctz32(a->imm);
2219 index = a->imm >> (esz + 1);
2220
2221 if ((index << esz) < vsz) {
2222 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2223 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2224 } else {
7e17d50e
RH
2225 /*
2226 * While dup_mem handles 128-bit elements, dup_imm does not.
2227 * Thankfully element size doesn't matter for splatting zero.
2228 */
2229 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2230 }
2231 }
2232 return true;
2233}
2234
2235static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2236{
2237 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2238 static gen_insr * const fns[4] = {
2239 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2240 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2241 };
2242 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2243 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2244 TCGv_ptr t_zd = tcg_temp_new_ptr();
2245 TCGv_ptr t_zn = tcg_temp_new_ptr();
2246
2247 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2248 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2249
2250 fns[a->esz](t_zd, t_zn, val, desc);
2251
2252 tcg_temp_free_ptr(t_zd);
2253 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2254}
2255
3a7be554 2256static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2257{
2258 if (sve_access_check(s)) {
2259 TCGv_i64 t = tcg_temp_new_i64();
2260 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2261 do_insr_i64(s, a, t);
2262 tcg_temp_free_i64(t);
2263 }
2264 return true;
2265}
2266
3a7be554 2267static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2268{
2269 if (sve_access_check(s)) {
2270 do_insr_i64(s, a, cpu_reg(s, a->rm));
2271 }
2272 return true;
2273}
2274
0ea3cdbf
RH
2275static gen_helper_gvec_2 * const rev_fns[4] = {
2276 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2277 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2278};
2279TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2280
32e2ad65
RH
2281static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2282 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2283 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2284};
2285TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2286
5f425b92
RH
2287static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2288 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2289 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2290};
2291TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2292 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2293
32e2ad65
RH
2294static gen_helper_gvec_3 * const tbx_fns[4] = {
2295 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2296 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2297};
2298TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2299
3a7be554 2300static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2301{
2302 static gen_helper_gvec_2 * const fns[4][2] = {
2303 { NULL, NULL },
2304 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2305 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2306 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2307 };
2308
2309 if (a->esz == 0) {
2310 return false;
2311 }
2312 if (sve_access_check(s)) {
2313 unsigned vsz = vec_full_reg_size(s);
2314 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2315 vec_full_reg_offset(s, a->rn)
2316 + (a->h ? vsz / 2 : 0),
2317 vsz, vsz, 0, fns[a->esz][a->u]);
2318 }
2319 return true;
2320}
2321
d731d8cb
RH
2322/*
2323 *** SVE Permute - Predicates Group
2324 */
2325
2326static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2327 gen_helper_gvec_3 *fn)
2328{
2329 if (!sve_access_check(s)) {
2330 return true;
2331 }
2332
2333 unsigned vsz = pred_full_reg_size(s);
2334
d731d8cb
RH
2335 TCGv_ptr t_d = tcg_temp_new_ptr();
2336 TCGv_ptr t_n = tcg_temp_new_ptr();
2337 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2338 uint32_t desc = 0;
d731d8cb 2339
f9b0fcce
RH
2340 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2341 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2342 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2343
2344 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2345 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2346 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2347
c6a59b55 2348 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2349
2350 tcg_temp_free_ptr(t_d);
2351 tcg_temp_free_ptr(t_n);
2352 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2353 return true;
2354}
2355
2356static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2357 gen_helper_gvec_2 *fn)
2358{
2359 if (!sve_access_check(s)) {
2360 return true;
2361 }
2362
2363 unsigned vsz = pred_full_reg_size(s);
2364 TCGv_ptr t_d = tcg_temp_new_ptr();
2365 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2366 uint32_t desc = 0;
d731d8cb
RH
2367
2368 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2369 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2370
70acaafe
RH
2371 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2372 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2373 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2374
c6a59b55 2375 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2376
d731d8cb
RH
2377 tcg_temp_free_ptr(t_d);
2378 tcg_temp_free_ptr(t_n);
2379 return true;
2380}
2381
bdb349f5
RH
2382TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2383TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2384TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2385TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2386TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2387TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2388
1d0fce4b
RH
2389TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2390TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2391TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2392
234b48e9
RH
2393/*
2394 *** SVE Permute - Interleaving Group
2395 */
2396
a95b9618
RH
2397static gen_helper_gvec_3 * const zip_fns[4] = {
2398 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2399 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2400};
2401TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2402 zip_fns[a->esz], a, 0)
2403TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2404 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2405
2406TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2407 gen_helper_sve2_zip_q, a, 0)
2408TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2409 gen_helper_sve2_zip_q, a,
2410 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2411
234b48e9
RH
2412static gen_helper_gvec_3 * const uzp_fns[4] = {
2413 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2414 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2415};
2416
32e2ad65
RH
2417TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2418 uzp_fns[a->esz], a, 0)
2419TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2420 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2421
32e2ad65
RH
2422TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2423 gen_helper_sve2_uzp_q, a, 0)
2424TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2425 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2426
234b48e9
RH
2427static gen_helper_gvec_3 * const trn_fns[4] = {
2428 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2429 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2430};
2431
32e2ad65
RH
2432TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2433 trn_fns[a->esz], a, 0)
2434TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2435 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2436
32e2ad65
RH
2437TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2438 gen_helper_sve2_trn_q, a, 0)
2439TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2440 gen_helper_sve2_trn_q, a, 16)
74b64b25 2441
3ca879ae
RH
2442/*
2443 *** SVE Permute Vector - Predicated Group
2444 */
2445
817bd5c9
RH
2446static gen_helper_gvec_3 * const compact_fns[4] = {
2447 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2448};
2449TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2450
ef23cb72
RH
2451/* Call the helper that computes the ARM LastActiveElement pseudocode
2452 * function, scaled by the element size. This includes the not found
2453 * indication; e.g. not found for esz=3 is -8.
2454 */
2455static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2456{
2457 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2458 * round up, as we do elsewhere, because we need the exact size.
2459 */
2460 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2461 unsigned desc = 0;
ef23cb72 2462
2acbfbe4
RH
2463 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2464 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2465
2466 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2467
c6a59b55 2468 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2469
ef23cb72
RH
2470 tcg_temp_free_ptr(t_p);
2471}
2472
2473/* Increment LAST to the offset of the next element in the vector,
2474 * wrapping around to 0.
2475 */
2476static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2477{
2478 unsigned vsz = vec_full_reg_size(s);
2479
2480 tcg_gen_addi_i32(last, last, 1 << esz);
2481 if (is_power_of_2(vsz)) {
2482 tcg_gen_andi_i32(last, last, vsz - 1);
2483 } else {
4b308bd5
RH
2484 TCGv_i32 max = tcg_constant_i32(vsz);
2485 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2486 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2487 }
2488}
2489
2490/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2491static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2492{
2493 unsigned vsz = vec_full_reg_size(s);
2494
2495 if (is_power_of_2(vsz)) {
2496 tcg_gen_andi_i32(last, last, vsz - 1);
2497 } else {
4b308bd5
RH
2498 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2499 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2500 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2501 }
2502}
2503
2504/* Load an unsigned element of ESZ from BASE+OFS. */
2505static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2506{
2507 TCGv_i64 r = tcg_temp_new_i64();
2508
2509 switch (esz) {
2510 case 0:
2511 tcg_gen_ld8u_i64(r, base, ofs);
2512 break;
2513 case 1:
2514 tcg_gen_ld16u_i64(r, base, ofs);
2515 break;
2516 case 2:
2517 tcg_gen_ld32u_i64(r, base, ofs);
2518 break;
2519 case 3:
2520 tcg_gen_ld_i64(r, base, ofs);
2521 break;
2522 default:
2523 g_assert_not_reached();
2524 }
2525 return r;
2526}
2527
2528/* Load an unsigned element of ESZ from RM[LAST]. */
2529static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2530 int rm, int esz)
2531{
2532 TCGv_ptr p = tcg_temp_new_ptr();
2533 TCGv_i64 r;
2534
2535 /* Convert offset into vector into offset into ENV.
2536 * The final adjustment for the vector register base
2537 * is added via constant offset to the load.
2538 */
e03b5686 2539#if HOST_BIG_ENDIAN
ef23cb72
RH
2540 /* Adjust for element ordering. See vec_reg_offset. */
2541 if (esz < 3) {
2542 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2543 }
2544#endif
2545 tcg_gen_ext_i32_ptr(p, last);
2546 tcg_gen_add_ptr(p, p, cpu_env);
2547
2548 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2549 tcg_temp_free_ptr(p);
2550
2551 return r;
2552}
2553
2554/* Compute CLAST for a Zreg. */
2555static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2556{
2557 TCGv_i32 last;
2558 TCGLabel *over;
2559 TCGv_i64 ele;
2560 unsigned vsz, esz = a->esz;
2561
2562 if (!sve_access_check(s)) {
2563 return true;
2564 }
2565
2566 last = tcg_temp_local_new_i32();
2567 over = gen_new_label();
2568
2569 find_last_active(s, last, esz, a->pg);
2570
2571 /* There is of course no movcond for a 2048-bit vector,
2572 * so we must branch over the actual store.
2573 */
2574 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2575
2576 if (!before) {
2577 incr_last_active(s, last, esz);
2578 }
2579
2580 ele = load_last_active(s, last, a->rm, esz);
2581 tcg_temp_free_i32(last);
2582
2583 vsz = vec_full_reg_size(s);
2584 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2585 tcg_temp_free_i64(ele);
2586
2587 /* If this insn used MOVPRFX, we may need a second move. */
2588 if (a->rd != a->rn) {
2589 TCGLabel *done = gen_new_label();
2590 tcg_gen_br(done);
2591
2592 gen_set_label(over);
2593 do_mov_z(s, a->rd, a->rn);
2594
2595 gen_set_label(done);
2596 } else {
2597 gen_set_label(over);
2598 }
2599 return true;
2600}
2601
db7fa5d8
RH
2602TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2603TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2604
2605/* Compute CLAST for a scalar. */
2606static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2607 bool before, TCGv_i64 reg_val)
2608{
2609 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2610 TCGv_i64 ele, cmp;
ef23cb72
RH
2611
2612 find_last_active(s, last, esz, pg);
2613
2614 /* Extend the original value of last prior to incrementing. */
2615 cmp = tcg_temp_new_i64();
2616 tcg_gen_ext_i32_i64(cmp, last);
2617
2618 if (!before) {
2619 incr_last_active(s, last, esz);
2620 }
2621
2622 /* The conceit here is that while last < 0 indicates not found, after
2623 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2624 * from which we can load garbage. We then discard the garbage with
2625 * a conditional move.
2626 */
2627 ele = load_last_active(s, last, rm, esz);
2628 tcg_temp_free_i32(last);
2629
053552d3
RH
2630 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2631 ele, reg_val);
ef23cb72 2632
ef23cb72
RH
2633 tcg_temp_free_i64(cmp);
2634 tcg_temp_free_i64(ele);
2635}
2636
2637/* Compute CLAST for a Vreg. */
2638static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2639{
2640 if (sve_access_check(s)) {
2641 int esz = a->esz;
2642 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2643 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2644
2645 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2646 write_fp_dreg(s, a->rd, reg);
2647 tcg_temp_free_i64(reg);
2648 }
2649 return true;
2650}
2651
ac4fb247
RH
2652TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2653TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2654
2655/* Compute CLAST for a Xreg. */
2656static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2657{
2658 TCGv_i64 reg;
2659
2660 if (!sve_access_check(s)) {
2661 return true;
2662 }
2663
2664 reg = cpu_reg(s, a->rd);
2665 switch (a->esz) {
2666 case 0:
2667 tcg_gen_ext8u_i64(reg, reg);
2668 break;
2669 case 1:
2670 tcg_gen_ext16u_i64(reg, reg);
2671 break;
2672 case 2:
2673 tcg_gen_ext32u_i64(reg, reg);
2674 break;
2675 case 3:
2676 break;
2677 default:
2678 g_assert_not_reached();
2679 }
2680
2681 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2682 return true;
2683}
2684
c673404a
RH
2685TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2686TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2687
2688/* Compute LAST for a scalar. */
2689static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2690 int pg, int rm, bool before)
2691{
2692 TCGv_i32 last = tcg_temp_new_i32();
2693 TCGv_i64 ret;
2694
2695 find_last_active(s, last, esz, pg);
2696 if (before) {
2697 wrap_last_active(s, last, esz);
2698 } else {
2699 incr_last_active(s, last, esz);
2700 }
2701
2702 ret = load_last_active(s, last, rm, esz);
2703 tcg_temp_free_i32(last);
2704 return ret;
2705}
2706
2707/* Compute LAST for a Vreg. */
2708static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2709{
2710 if (sve_access_check(s)) {
2711 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2712 write_fp_dreg(s, a->rd, val);
2713 tcg_temp_free_i64(val);
2714 }
2715 return true;
2716}
2717
75de9fd4
RH
2718TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2719TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2720
2721/* Compute LAST for a Xreg. */
2722static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2723{
2724 if (sve_access_check(s)) {
2725 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2726 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2727 tcg_temp_free_i64(val);
2728 }
2729 return true;
2730}
2731
884c5a80
RH
2732TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2733TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2734
3a7be554 2735static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2736{
2737 if (sve_access_check(s)) {
2738 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2739 }
2740 return true;
2741}
2742
3a7be554 2743static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2744{
2745 if (sve_access_check(s)) {
2746 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2747 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2748 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2749 tcg_temp_free_i64(t);
2750 }
2751 return true;
2752}
2753
817bd5c9
RH
2754static gen_helper_gvec_3 * const revb_fns[4] = {
2755 NULL, gen_helper_sve_revb_h,
2756 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2757};
2758TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2759
817bd5c9
RH
2760static gen_helper_gvec_3 * const revh_fns[4] = {
2761 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2762};
2763TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2764
817bd5c9
RH
2765TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2766 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2767
897ebd70
RH
2768TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2769 gen_helper_sve_splice, a, a->esz)
b48ff240 2770
897ebd70
RH
2771TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2772 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2773
757f9cff
RH
2774/*
2775 *** SVE Integer Compare - Vectors Group
2776 */
2777
2778static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2779 gen_helper_gvec_flags_4 *gen_fn)
2780{
2781 TCGv_ptr pd, zn, zm, pg;
2782 unsigned vsz;
2783 TCGv_i32 t;
2784
2785 if (gen_fn == NULL) {
2786 return false;
2787 }
2788 if (!sve_access_check(s)) {
2789 return true;
2790 }
2791
2792 vsz = vec_full_reg_size(s);
392acacc 2793 t = tcg_temp_new_i32();
757f9cff
RH
2794 pd = tcg_temp_new_ptr();
2795 zn = tcg_temp_new_ptr();
2796 zm = tcg_temp_new_ptr();
2797 pg = tcg_temp_new_ptr();
2798
2799 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2800 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2801 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2802 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2803
392acacc 2804 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2805
2806 tcg_temp_free_ptr(pd);
2807 tcg_temp_free_ptr(zn);
2808 tcg_temp_free_ptr(zm);
2809 tcg_temp_free_ptr(pg);
2810
2811 do_pred_flags(t);
2812
2813 tcg_temp_free_i32(t);
2814 return true;
2815}
2816
2817#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2818 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2819 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2820 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2821 }; \
2822 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2823 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2824
2825DO_PPZZ(CMPEQ, cmpeq)
2826DO_PPZZ(CMPNE, cmpne)
2827DO_PPZZ(CMPGT, cmpgt)
2828DO_PPZZ(CMPGE, cmpge)
2829DO_PPZZ(CMPHI, cmphi)
2830DO_PPZZ(CMPHS, cmphs)
2831
2832#undef DO_PPZZ
2833
2834#define DO_PPZW(NAME, name) \
671bdb2e
RH
2835 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2836 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2837 gen_helper_sve_##name##_ppzw_s, NULL \
2838 }; \
2839 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2840 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2841
2842DO_PPZW(CMPEQ, cmpeq)
2843DO_PPZW(CMPNE, cmpne)
2844DO_PPZW(CMPGT, cmpgt)
2845DO_PPZW(CMPGE, cmpge)
2846DO_PPZW(CMPHI, cmphi)
2847DO_PPZW(CMPHS, cmphs)
2848DO_PPZW(CMPLT, cmplt)
2849DO_PPZW(CMPLE, cmple)
2850DO_PPZW(CMPLO, cmplo)
2851DO_PPZW(CMPLS, cmpls)
2852
2853#undef DO_PPZW
2854
38cadeba
RH
2855/*
2856 *** SVE Integer Compare - Immediate Groups
2857 */
2858
2859static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2860 gen_helper_gvec_flags_3 *gen_fn)
2861{
2862 TCGv_ptr pd, zn, pg;
2863 unsigned vsz;
2864 TCGv_i32 t;
2865
2866 if (gen_fn == NULL) {
2867 return false;
2868 }
2869 if (!sve_access_check(s)) {
2870 return true;
2871 }
2872
2873 vsz = vec_full_reg_size(s);
392acacc 2874 t = tcg_temp_new_i32();
38cadeba
RH
2875 pd = tcg_temp_new_ptr();
2876 zn = tcg_temp_new_ptr();
2877 pg = tcg_temp_new_ptr();
2878
2879 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2880 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2881 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2882
392acacc 2883 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
2884
2885 tcg_temp_free_ptr(pd);
2886 tcg_temp_free_ptr(zn);
2887 tcg_temp_free_ptr(pg);
2888
2889 do_pred_flags(t);
2890
2891 tcg_temp_free_i32(t);
2892 return true;
2893}
2894
2895#define DO_PPZI(NAME, name) \
9c545be6 2896 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
2897 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2898 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2899 }; \
9c545be6
RH
2900 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2901 name##_ppzi_fns[a->esz])
38cadeba
RH
2902
2903DO_PPZI(CMPEQ, cmpeq)
2904DO_PPZI(CMPNE, cmpne)
2905DO_PPZI(CMPGT, cmpgt)
2906DO_PPZI(CMPGE, cmpge)
2907DO_PPZI(CMPHI, cmphi)
2908DO_PPZI(CMPHS, cmphs)
2909DO_PPZI(CMPLT, cmplt)
2910DO_PPZI(CMPLE, cmple)
2911DO_PPZI(CMPLO, cmplo)
2912DO_PPZI(CMPLS, cmpls)
2913
2914#undef DO_PPZI
2915
35da316f
RH
2916/*
2917 *** SVE Partition Break Group
2918 */
2919
2920static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2921 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2922{
2923 if (!sve_access_check(s)) {
2924 return true;
2925 }
2926
2927 unsigned vsz = pred_full_reg_size(s);
2928
2929 /* Predicate sizes may be smaller and cannot use simd_desc. */
2930 TCGv_ptr d = tcg_temp_new_ptr();
2931 TCGv_ptr n = tcg_temp_new_ptr();
2932 TCGv_ptr m = tcg_temp_new_ptr();
2933 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2934 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2935
2936 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2937 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2938 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2939 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2940
2941 if (a->s) {
93418f1c
RH
2942 TCGv_i32 t = tcg_temp_new_i32();
2943 fn_s(t, d, n, m, g, desc);
35da316f 2944 do_pred_flags(t);
93418f1c 2945 tcg_temp_free_i32(t);
35da316f 2946 } else {
93418f1c 2947 fn(d, n, m, g, desc);
35da316f
RH
2948 }
2949 tcg_temp_free_ptr(d);
2950 tcg_temp_free_ptr(n);
2951 tcg_temp_free_ptr(m);
2952 tcg_temp_free_ptr(g);
35da316f
RH
2953 return true;
2954}
2955
2956static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2957 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2958{
2959 if (!sve_access_check(s)) {
2960 return true;
2961 }
2962
2963 unsigned vsz = pred_full_reg_size(s);
2964
2965 /* Predicate sizes may be smaller and cannot use simd_desc. */
2966 TCGv_ptr d = tcg_temp_new_ptr();
2967 TCGv_ptr n = tcg_temp_new_ptr();
2968 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2969 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2970
2971 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2972 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2973 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2974
2975 if (a->s) {
93418f1c
RH
2976 TCGv_i32 t = tcg_temp_new_i32();
2977 fn_s(t, d, n, g, desc);
35da316f 2978 do_pred_flags(t);
93418f1c 2979 tcg_temp_free_i32(t);
35da316f 2980 } else {
93418f1c 2981 fn(d, n, g, desc);
35da316f
RH
2982 }
2983 tcg_temp_free_ptr(d);
2984 tcg_temp_free_ptr(n);
2985 tcg_temp_free_ptr(g);
35da316f
RH
2986 return true;
2987}
2988
2224d24d
RH
2989TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
2990 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
2991TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
2992 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
2993
2994TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
2995 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
2996TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
2997 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
2998
2999TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3000 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3001TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3002 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3003
3004TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3005 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 3006
9ee3a611
RH
3007/*
3008 *** SVE Predicate Count Group
3009 */
3010
3011static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3012{
3013 unsigned psz = pred_full_reg_size(s);
3014
3015 if (psz <= 8) {
3016 uint64_t psz_mask;
3017
3018 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3019 if (pn != pg) {
3020 TCGv_i64 g = tcg_temp_new_i64();
3021 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3022 tcg_gen_and_i64(val, val, g);
3023 tcg_temp_free_i64(g);
3024 }
3025
3026 /* Reduce the pred_esz_masks value simply to reduce the
3027 * size of the code generated here.
3028 */
3029 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3030 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3031
3032 tcg_gen_ctpop_i64(val, val);
3033 } else {
3034 TCGv_ptr t_pn = tcg_temp_new_ptr();
3035 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3036 unsigned desc = 0;
9ee3a611 3037
f556a201
RH
3038 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3039 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3040
3041 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3042 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3043
c6a59b55 3044 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3045 tcg_temp_free_ptr(t_pn);
3046 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3047 }
3048}
3049
3a7be554 3050static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3051{
3052 if (sve_access_check(s)) {
3053 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3054 }
3055 return true;
3056}
3057
3a7be554 3058static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3059{
3060 if (sve_access_check(s)) {
3061 TCGv_i64 reg = cpu_reg(s, a->rd);
3062 TCGv_i64 val = tcg_temp_new_i64();
3063
3064 do_cntp(s, val, a->esz, a->pg, a->pg);
3065 if (a->d) {
3066 tcg_gen_sub_i64(reg, reg, val);
3067 } else {
3068 tcg_gen_add_i64(reg, reg, val);
3069 }
3070 tcg_temp_free_i64(val);
3071 }
3072 return true;
3073}
3074
3a7be554 3075static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3076{
3077 if (a->esz == 0) {
3078 return false;
3079 }
3080 if (sve_access_check(s)) {
3081 unsigned vsz = vec_full_reg_size(s);
3082 TCGv_i64 val = tcg_temp_new_i64();
3083 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3084
3085 do_cntp(s, val, a->esz, a->pg, a->pg);
3086 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3087 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3088 }
3089 return true;
3090}
3091
3a7be554 3092static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3093{
3094 if (sve_access_check(s)) {
3095 TCGv_i64 reg = cpu_reg(s, a->rd);
3096 TCGv_i64 val = tcg_temp_new_i64();
3097
3098 do_cntp(s, val, a->esz, a->pg, a->pg);
3099 do_sat_addsub_32(reg, val, a->u, a->d);
3100 }
3101 return true;
3102}
3103
3a7be554 3104static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3105{
3106 if (sve_access_check(s)) {
3107 TCGv_i64 reg = cpu_reg(s, a->rd);
3108 TCGv_i64 val = tcg_temp_new_i64();
3109
3110 do_cntp(s, val, a->esz, a->pg, a->pg);
3111 do_sat_addsub_64(reg, val, a->u, a->d);
3112 }
3113 return true;
3114}
3115
3a7be554 3116static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3117{
3118 if (a->esz == 0) {
3119 return false;
3120 }
3121 if (sve_access_check(s)) {
3122 TCGv_i64 val = tcg_temp_new_i64();
3123 do_cntp(s, val, a->esz, a->pg, a->pg);
3124 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3125 }
3126 return true;
3127}
3128
caf1cefc
RH
3129/*
3130 *** SVE Integer Compare Scalars Group
3131 */
3132
3a7be554 3133static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3134{
3135 if (!sve_access_check(s)) {
3136 return true;
3137 }
3138
3139 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3140 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3141 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3142 TCGv_i64 cmp = tcg_temp_new_i64();
3143
3144 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3145 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3146 tcg_temp_free_i64(cmp);
3147
3148 /* VF = !NF & !CF. */
3149 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3150 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3151
3152 /* Both NF and VF actually look at bit 31. */
3153 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3154 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3155 return true;
3156}
3157
3a7be554 3158static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3159{
bbd0968c 3160 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3161 TCGv_i32 t2;
caf1cefc 3162 TCGv_ptr ptr;
e610906c
RH
3163 unsigned vsz = vec_full_reg_size(s);
3164 unsigned desc = 0;
caf1cefc 3165 TCGCond cond;
34688dbc
RH
3166 uint64_t maxval;
3167 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3168 bool eq = a->eq == a->lt;
caf1cefc 3169
34688dbc
RH
3170 /* The greater-than conditions are all SVE2. */
3171 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3172 return false;
3173 }
bbd0968c
RH
3174 if (!sve_access_check(s)) {
3175 return true;
3176 }
3177
3178 op0 = read_cpu_reg(s, a->rn, 1);
3179 op1 = read_cpu_reg(s, a->rm, 1);
3180
caf1cefc
RH
3181 if (!a->sf) {
3182 if (a->u) {
3183 tcg_gen_ext32u_i64(op0, op0);
3184 tcg_gen_ext32u_i64(op1, op1);
3185 } else {
3186 tcg_gen_ext32s_i64(op0, op0);
3187 tcg_gen_ext32s_i64(op1, op1);
3188 }
3189 }
3190
3191 /* For the helper, compress the different conditions into a computation
3192 * of how many iterations for which the condition is true.
caf1cefc 3193 */
bbd0968c
RH
3194 t0 = tcg_temp_new_i64();
3195 t1 = tcg_temp_new_i64();
34688dbc
RH
3196
3197 if (a->lt) {
3198 tcg_gen_sub_i64(t0, op1, op0);
3199 if (a->u) {
3200 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3201 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3202 } else {
3203 maxval = a->sf ? INT64_MAX : INT32_MAX;
3204 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3205 }
3206 } else {
3207 tcg_gen_sub_i64(t0, op0, op1);
3208 if (a->u) {
3209 maxval = 0;
3210 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3211 } else {
3212 maxval = a->sf ? INT64_MIN : INT32_MIN;
3213 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3214 }
3215 }
caf1cefc 3216
4481bbf2 3217 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3218 if (eq) {
caf1cefc
RH
3219 /* Equality means one more iteration. */
3220 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3221
34688dbc
RH
3222 /*
3223 * For the less-than while, if op1 is maxval (and the only time
3224 * the addition above could overflow), then we produce an all-true
3225 * predicate by setting the count to the vector length. This is
3226 * because the pseudocode is described as an increment + compare
3227 * loop, and the maximum integer would always compare true.
3228 * Similarly, the greater-than while has the same issue with the
3229 * minimum integer due to the decrement + compare loop.
bbd0968c 3230 */
34688dbc 3231 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3232 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3233 }
3234
bbd0968c
RH
3235 /* Bound to the maximum. */
3236 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3237
3238 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3239 tcg_gen_movi_i64(t1, 0);
3240 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3241 tcg_temp_free_i64(t1);
caf1cefc 3242
bbd0968c 3243 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3244 t2 = tcg_temp_new_i32();
3245 tcg_gen_extrl_i64_i32(t2, t0);
3246 tcg_temp_free_i64(t0);
bbd0968c
RH
3247
3248 /* Scale elements to bits. */
3249 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3250
e610906c
RH
3251 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3252 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3253
3254 ptr = tcg_temp_new_ptr();
3255 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3256
34688dbc 3257 if (a->lt) {
4481bbf2 3258 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3259 } else {
4481bbf2 3260 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3261 }
caf1cefc
RH
3262 do_pred_flags(t2);
3263
3264 tcg_temp_free_ptr(ptr);
3265 tcg_temp_free_i32(t2);
caf1cefc
RH
3266 return true;
3267}
3268
14f6dad1
RH
3269static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3270{
3271 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3272 TCGv_i32 t2;
14f6dad1
RH
3273 TCGv_ptr ptr;
3274 unsigned vsz = vec_full_reg_size(s);
3275 unsigned desc = 0;
3276
3277 if (!dc_isar_feature(aa64_sve2, s)) {
3278 return false;
3279 }
3280 if (!sve_access_check(s)) {
3281 return true;
3282 }
3283
3284 op0 = read_cpu_reg(s, a->rn, 1);
3285 op1 = read_cpu_reg(s, a->rm, 1);
3286
4481bbf2 3287 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3288 diff = tcg_temp_new_i64();
3289
3290 if (a->rw) {
3291 /* WHILERW */
3292 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3293 t1 = tcg_temp_new_i64();
3294 tcg_gen_sub_i64(diff, op0, op1);
3295 tcg_gen_sub_i64(t1, op1, op0);
3296 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3297 tcg_temp_free_i64(t1);
3298 /* Round down to a multiple of ESIZE. */
3299 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3300 /* If op1 == op0, diff == 0, and the condition is always true. */
3301 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3302 } else {
3303 /* WHILEWR */
3304 tcg_gen_sub_i64(diff, op1, op0);
3305 /* Round down to a multiple of ESIZE. */
3306 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3307 /* If op0 >= op1, diff <= 0, the condition is always true. */
3308 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3309 }
3310
3311 /* Bound to the maximum. */
3312 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3313
3314 /* Since we're bounded, pass as a 32-bit type. */
3315 t2 = tcg_temp_new_i32();
3316 tcg_gen_extrl_i64_i32(t2, diff);
3317 tcg_temp_free_i64(diff);
3318
3319 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3320 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3321
3322 ptr = tcg_temp_new_ptr();
3323 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3324
4481bbf2 3325 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3326 do_pred_flags(t2);
3327
3328 tcg_temp_free_ptr(ptr);
3329 tcg_temp_free_i32(t2);
14f6dad1
RH
3330 return true;
3331}
3332
ed491961
RH
3333/*
3334 *** SVE Integer Wide Immediate - Unpredicated Group
3335 */
3336
3a7be554 3337static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3338{
3339 if (a->esz == 0) {
3340 return false;
3341 }
3342 if (sve_access_check(s)) {
3343 unsigned vsz = vec_full_reg_size(s);
3344 int dofs = vec_full_reg_offset(s, a->rd);
3345 uint64_t imm;
3346
3347 /* Decode the VFP immediate. */
3348 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3349 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3350 }
3351 return true;
3352}
3353
3a7be554 3354static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3355{
ed491961
RH
3356 if (sve_access_check(s)) {
3357 unsigned vsz = vec_full_reg_size(s);
3358 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3359 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3360 }
3361 return true;
3362}
3363
48ca613d 3364TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3365
3a7be554 3366static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3367{
3368 a->imm = -a->imm;
3a7be554 3369 return trans_ADD_zzi(s, a);
6e6a157d
RH
3370}
3371
3a7be554 3372static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3373{
53229a77 3374 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3375 static const GVecGen2s op[4] = {
3376 { .fni8 = tcg_gen_vec_sub8_i64,
3377 .fniv = tcg_gen_sub_vec,
3378 .fno = gen_helper_sve_subri_b,
53229a77 3379 .opt_opc = vecop_list,
6e6a157d
RH
3380 .vece = MO_8,
3381 .scalar_first = true },
3382 { .fni8 = tcg_gen_vec_sub16_i64,
3383 .fniv = tcg_gen_sub_vec,
3384 .fno = gen_helper_sve_subri_h,
53229a77 3385 .opt_opc = vecop_list,
6e6a157d
RH
3386 .vece = MO_16,
3387 .scalar_first = true },
3388 { .fni4 = tcg_gen_sub_i32,
3389 .fniv = tcg_gen_sub_vec,
3390 .fno = gen_helper_sve_subri_s,
53229a77 3391 .opt_opc = vecop_list,
6e6a157d
RH
3392 .vece = MO_32,
3393 .scalar_first = true },
3394 { .fni8 = tcg_gen_sub_i64,
3395 .fniv = tcg_gen_sub_vec,
3396 .fno = gen_helper_sve_subri_d,
53229a77 3397 .opt_opc = vecop_list,
6e6a157d
RH
3398 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3399 .vece = MO_64,
3400 .scalar_first = true }
3401 };
3402
6e6a157d
RH
3403 if (sve_access_check(s)) {
3404 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3405 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3406 vec_full_reg_offset(s, a->rn),
9fff3fcc 3407 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3408 }
3409 return true;
3410}
3411
fa4bd72c 3412TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3413
3a7be554 3414static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3415{
6e6a157d 3416 if (sve_access_check(s)) {
138a1f7b
RH
3417 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3418 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3419 }
3420 return true;
3421}
3422
17b54d1c
RH
3423TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3424TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3425TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3426TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3427
3428static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3429{
3430 if (sve_access_check(s)) {
3431 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3432 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3433 vec_full_reg_offset(s, a->rn),
138a1f7b 3434 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3435 }
3436 return true;
3437}
3438
3439#define DO_ZZI(NAME, name) \
ef4a3958 3440 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3441 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3442 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3443 }; \
ef4a3958 3444 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3445
3446DO_ZZI(SMAX, smax)
3447DO_ZZI(UMAX, umax)
3448DO_ZZI(SMIN, smin)
3449DO_ZZI(UMIN, umin)
3450
3451#undef DO_ZZI
3452
5f425b92
RH
3453static gen_helper_gvec_4 * const dot_fns[2][2] = {
3454 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3455 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3456};
3457TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3458 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3459
814d4c52
RH
3460/*
3461 * SVE Multiply - Indexed
3462 */
3463
f3500a25
RH
3464TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3465 gen_helper_gvec_sdot_idx_b, a)
3466TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3467 gen_helper_gvec_sdot_idx_h, a)
3468TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3469 gen_helper_gvec_udot_idx_b, a)
3470TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3471 gen_helper_gvec_udot_idx_h, a)
3472
3473TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3474 gen_helper_gvec_sudot_idx_b, a)
3475TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3476 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3477
814d4c52 3478#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3479 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3480 a->rd, a->rn, a->rm, a->index)
814d4c52 3481
af031f64
RH
3482DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3483DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3484DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3485
af031f64
RH
3486DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3487DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3488DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3489
af031f64
RH
3490DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3491DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3492DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3493
814d4c52
RH
3494#undef DO_SVE2_RRX
3495
b95f5eeb 3496#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3497 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3498 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3499
3500DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3501DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3502DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3503DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3504
3505DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3506DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3507DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3508DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3509
3510DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3511DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3512DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3513DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3514
b95f5eeb
RH
3515#undef DO_SVE2_RRX_TB
3516
8a02aac7 3517#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3518 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3519
8681eb76
RH
3520DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3521DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3522DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3523
8681eb76
RH
3524DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3525DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3526DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3527
8681eb76
RH
3528DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3529DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3530DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3531
8681eb76
RH
3532DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3533DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3534DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3535
8a02aac7
RH
3536#undef DO_SVE2_RRXR
3537
c5c455d7 3538#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3539 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3540 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3541
3542DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3543DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3544DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3545DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3546
3547DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3548DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3549DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3550DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3551
3552DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3553DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3554DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3555DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3556
3557DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3558DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3559DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3560DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3561
3562DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3563DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3564DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3565DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3566
3567DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3568DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3569DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3570DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3571
3572#undef DO_SVE2_RRXR_TB
3573
3b787ed8 3574#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3575 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3576 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3577
3578DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3579DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3580
3581DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3582DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3583
21068f39
RH
3584DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3585DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3586
3b787ed8
RH
3587#undef DO_SVE2_RRXR_ROT
3588
ca40a6e6
RH
3589/*
3590 *** SVE Floating Point Multiply-Add Indexed Group
3591 */
3592
0a82d963 3593static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3594{
41bf9b67
RH
3595 static gen_helper_gvec_4_ptr * const fns[4] = {
3596 NULL,
ca40a6e6
RH
3597 gen_helper_gvec_fmla_idx_h,
3598 gen_helper_gvec_fmla_idx_s,
3599 gen_helper_gvec_fmla_idx_d,
3600 };
41bf9b67
RH
3601 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3602 (a->index << 1) | sub,
3603 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3604}
3605
3b879c28
RH
3606TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3607TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
0a82d963 3608
ca40a6e6
RH
3609/*
3610 *** SVE Floating Point Multiply Indexed Group
3611 */
3612
9c99ef66
RH
3613static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3614 NULL, gen_helper_gvec_fmul_idx_h,
3615 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3616};
3617TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3618 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3619 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
ca40a6e6 3620
23fbe79f
RH
3621/*
3622 *** SVE Floating Point Fast Reduction Group
3623 */
3624
3625typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3626 TCGv_ptr, TCGv_i32);
3627
5ce18efe 3628static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
23fbe79f
RH
3629 gen_helper_fp_reduce *fn)
3630{
5ce18efe
RH
3631 unsigned vsz, p2vsz;
3632 TCGv_i32 t_desc;
23fbe79f
RH
3633 TCGv_ptr t_zn, t_pg, status;
3634 TCGv_i64 temp;
3635
5ce18efe
RH
3636 if (fn == NULL) {
3637 return false;
3638 }
3639 if (!sve_access_check(s)) {
3640 return true;
3641 }
3642
3643 vsz = vec_full_reg_size(s);
3644 p2vsz = pow2ceil(vsz);
3645 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3646 temp = tcg_temp_new_i64();
3647 t_zn = tcg_temp_new_ptr();
3648 t_pg = tcg_temp_new_ptr();
3649
3650 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3651 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3652 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3653
3654 fn(temp, t_zn, t_pg, status, t_desc);
3655 tcg_temp_free_ptr(t_zn);
3656 tcg_temp_free_ptr(t_pg);
3657 tcg_temp_free_ptr(status);
23fbe79f
RH
3658
3659 write_fp_dreg(s, a->rd, temp);
3660 tcg_temp_free_i64(temp);
5ce18efe 3661 return true;
23fbe79f
RH
3662}
3663
3664#define DO_VPZ(NAME, name) \
8003e7cf
RH
3665 static gen_helper_fp_reduce * const name##_fns[4] = { \
3666 NULL, gen_helper_sve_##name##_h, \
3667 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
23fbe79f 3668 }; \
8003e7cf 3669 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
23fbe79f
RH
3670
3671DO_VPZ(FADDV, faddv)
3672DO_VPZ(FMINNMV, fminnmv)
3673DO_VPZ(FMAXNMV, fmaxnmv)
3674DO_VPZ(FMINV, fminv)
3675DO_VPZ(FMAXV, fmaxv)
3676
8003e7cf
RH
3677#undef DO_VPZ
3678
3887c038
RH
3679/*
3680 *** SVE Floating Point Unary Operations - Unpredicated Group
3681 */
3682
de58c6b0
RH
3683static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3684 NULL, gen_helper_gvec_frecpe_h,
3685 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3686};
3687TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3887c038 3688
de58c6b0
RH
3689static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3690 NULL, gen_helper_gvec_frsqrte_h,
3691 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3692};
3693TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3887c038 3694
4d2e2a03
RH
3695/*
3696 *** SVE Floating Point Compare with Zero Group
3697 */
3698
3699static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3700 gen_helper_gvec_3_ptr *fn)
3701{
3702 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3703 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3704
3705 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3706 vec_full_reg_offset(s, a->rn),
3707 pred_full_reg_offset(s, a->pg),
3708 status, vsz, vsz, 0, fn);
3709 tcg_temp_free_ptr(status);
3710}
3711
3712#define DO_PPZ(NAME, name) \
3a7be554 3713static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3714{ \
3715 static gen_helper_gvec_3_ptr * const fns[3] = { \
3716 gen_helper_sve_##name##_h, \
3717 gen_helper_sve_##name##_s, \
3718 gen_helper_sve_##name##_d, \
3719 }; \
3720 if (a->esz == 0) { \
3721 return false; \
3722 } \
3723 if (sve_access_check(s)) { \
3724 do_ppz_fp(s, a, fns[a->esz - 1]); \
3725 } \
3726 return true; \
3727}
3728
3729DO_PPZ(FCMGE_ppz0, fcmge0)
3730DO_PPZ(FCMGT_ppz0, fcmgt0)
3731DO_PPZ(FCMLE_ppz0, fcmle0)
3732DO_PPZ(FCMLT_ppz0, fcmlt0)
3733DO_PPZ(FCMEQ_ppz0, fcmeq0)
3734DO_PPZ(FCMNE_ppz0, fcmne0)
3735
3736#undef DO_PPZ
3737
67fcd9ad
RH
3738/*
3739 *** SVE floating-point trig multiply-add coefficient
3740 */
3741
cdd85923
RH
3742static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3743 NULL, gen_helper_sve_ftmad_h,
3744 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3745};
3746TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3747 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3748 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
67fcd9ad 3749
7f9ddf64
RH
3750/*
3751 *** SVE Floating Point Accumulating Reduction Group
3752 */
3753
3a7be554 3754static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3755{
3756 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3757 TCGv_ptr, TCGv_ptr, TCGv_i32);
3758 static fadda_fn * const fns[3] = {
3759 gen_helper_sve_fadda_h,
3760 gen_helper_sve_fadda_s,
3761 gen_helper_sve_fadda_d,
3762 };
3763 unsigned vsz = vec_full_reg_size(s);
3764 TCGv_ptr t_rm, t_pg, t_fpst;
3765 TCGv_i64 t_val;
3766 TCGv_i32 t_desc;
3767
3768 if (a->esz == 0) {
3769 return false;
3770 }
3771 if (!sve_access_check(s)) {
3772 return true;
3773 }
3774
3775 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3776 t_rm = tcg_temp_new_ptr();
3777 t_pg = tcg_temp_new_ptr();
3778 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3779 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3780 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3781 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3782
3783 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3784
7f9ddf64
RH
3785 tcg_temp_free_ptr(t_fpst);
3786 tcg_temp_free_ptr(t_pg);
3787 tcg_temp_free_ptr(t_rm);
3788
3789 write_fp_dreg(s, a->rd, t_val);
3790 tcg_temp_free_i64(t_val);
3791 return true;
3792}
3793
29b80469
RH
3794/*
3795 *** SVE Floating Point Arithmetic - Unpredicated Group
3796 */
3797
29b80469 3798#define DO_FP3(NAME, name) \
bdd4ce0d 3799 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
29b80469
RH
3800 NULL, gen_helper_gvec_##name##_h, \
3801 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3802 }; \
bdd4ce0d 3803 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
29b80469
RH
3804
3805DO_FP3(FADD_zzz, fadd)
3806DO_FP3(FSUB_zzz, fsub)
3807DO_FP3(FMUL_zzz, fmul)
3808DO_FP3(FTSMUL, ftsmul)
3809DO_FP3(FRECPS, recps)
3810DO_FP3(FRSQRTS, rsqrts)
3811
3812#undef DO_FP3
3813
ec3b87c2
RH
3814/*
3815 *** SVE Floating Point Arithmetic - Predicated Group
3816 */
3817
3818static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3819 gen_helper_gvec_4_ptr *fn)
3820{
3821 if (fn == NULL) {
3822 return false;
3823 }
3824 if (sve_access_check(s)) {
3825 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3826 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3827 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3828 vec_full_reg_offset(s, a->rn),
3829 vec_full_reg_offset(s, a->rm),
3830 pred_full_reg_offset(s, a->pg),
3831 status, vsz, vsz, 0, fn);
3832 tcg_temp_free_ptr(status);
3833 }
3834 return true;
3835}
3836
3837#define DO_FP3(NAME, name) \
3a7be554 3838static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3839{ \
3840 static gen_helper_gvec_4_ptr * const fns[4] = { \
3841 NULL, gen_helper_sve_##name##_h, \
3842 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3843 }; \
3844 return do_zpzz_fp(s, a, fns[a->esz]); \
3845}
3846
3847DO_FP3(FADD_zpzz, fadd)
3848DO_FP3(FSUB_zpzz, fsub)
3849DO_FP3(FMUL_zpzz, fmul)
3850DO_FP3(FMIN_zpzz, fmin)
3851DO_FP3(FMAX_zpzz, fmax)
3852DO_FP3(FMINNM_zpzz, fminnum)
3853DO_FP3(FMAXNM_zpzz, fmaxnum)
3854DO_FP3(FABD, fabd)
3855DO_FP3(FSCALE, fscalbn)
3856DO_FP3(FDIV, fdiv)
3857DO_FP3(FMULX, fmulx)
3858
3859#undef DO_FP3
8092c6a3 3860
cc48affe
RH
3861typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3862 TCGv_i64, TCGv_ptr, TCGv_i32);
3863
3864static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3865 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3866{
3867 unsigned vsz = vec_full_reg_size(s);
3868 TCGv_ptr t_zd, t_zn, t_pg, status;
3869 TCGv_i32 desc;
3870
3871 t_zd = tcg_temp_new_ptr();
3872 t_zn = tcg_temp_new_ptr();
3873 t_pg = tcg_temp_new_ptr();
3874 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3875 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3876 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3877
cdfb22bb 3878 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3879 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
3880 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3881
cc48affe
RH
3882 tcg_temp_free_ptr(status);
3883 tcg_temp_free_ptr(t_pg);
3884 tcg_temp_free_ptr(t_zn);
3885 tcg_temp_free_ptr(t_zd);
3886}
3887
3888static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3889 gen_helper_sve_fp2scalar *fn)
3890{
138a1f7b
RH
3891 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3892 tcg_constant_i64(imm), fn);
cc48affe
RH
3893}
3894
3895#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3896static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3897{ \
3898 static gen_helper_sve_fp2scalar * const fns[3] = { \
3899 gen_helper_sve_##name##_h, \
3900 gen_helper_sve_##name##_s, \
3901 gen_helper_sve_##name##_d \
3902 }; \
3903 static uint64_t const val[3][2] = { \
3904 { float16_##const0, float16_##const1 }, \
3905 { float32_##const0, float32_##const1 }, \
3906 { float64_##const0, float64_##const1 }, \
3907 }; \
3908 if (a->esz == 0) { \
3909 return false; \
3910 } \
3911 if (sve_access_check(s)) { \
3912 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3913 } \
3914 return true; \
3915}
3916
cc48affe
RH
3917DO_FP_IMM(FADD, fadds, half, one)
3918DO_FP_IMM(FSUB, fsubs, half, one)
3919DO_FP_IMM(FMUL, fmuls, half, two)
3920DO_FP_IMM(FSUBR, fsubrs, half, one)
3921DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3922DO_FP_IMM(FMINNM, fminnms, zero, one)
3923DO_FP_IMM(FMAX, fmaxs, zero, one)
3924DO_FP_IMM(FMIN, fmins, zero, one)
3925
3926#undef DO_FP_IMM
3927
abfdefd5
RH
3928static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3929 gen_helper_gvec_4_ptr *fn)
3930{
3931 if (fn == NULL) {
3932 return false;
3933 }
3934 if (sve_access_check(s)) {
3935 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3936 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3937 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3938 vec_full_reg_offset(s, a->rn),
3939 vec_full_reg_offset(s, a->rm),
3940 pred_full_reg_offset(s, a->pg),
3941 status, vsz, vsz, 0, fn);
3942 tcg_temp_free_ptr(status);
3943 }
3944 return true;
3945}
3946
3947#define DO_FPCMP(NAME, name) \
3a7be554 3948static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3949{ \
3950 static gen_helper_gvec_4_ptr * const fns[4] = { \
3951 NULL, gen_helper_sve_##name##_h, \
3952 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3953 }; \
3954 return do_fp_cmp(s, a, fns[a->esz]); \
3955}
3956
3957DO_FPCMP(FCMGE, fcmge)
3958DO_FPCMP(FCMGT, fcmgt)
3959DO_FPCMP(FCMEQ, fcmeq)
3960DO_FPCMP(FCMNE, fcmne)
3961DO_FPCMP(FCMUO, fcmuo)
3962DO_FPCMP(FACGE, facge)
3963DO_FPCMP(FACGT, facgt)
3964
3965#undef DO_FPCMP
3966
3a7be554 3967static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3968{
3969 static gen_helper_gvec_4_ptr * const fns[3] = {
3970 gen_helper_sve_fcadd_h,
3971 gen_helper_sve_fcadd_s,
3972 gen_helper_sve_fcadd_d
3973 };
3974
3975 if (a->esz == 0) {
3976 return false;
3977 }
3978 if (sve_access_check(s)) {
3979 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3980 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3981 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3982 vec_full_reg_offset(s, a->rn),
3983 vec_full_reg_offset(s, a->rm),
3984 pred_full_reg_offset(s, a->pg),
3985 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3986 tcg_temp_free_ptr(status);
3987 }
3988 return true;
3989}
3990
08975da9
RH
3991static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3992 gen_helper_gvec_5_ptr *fn)
6ceabaad 3993{
08975da9 3994 if (a->esz == 0) {
6ceabaad
RH
3995 return false;
3996 }
08975da9
RH
3997 if (sve_access_check(s)) {
3998 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3999 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4000 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4001 vec_full_reg_offset(s, a->rn),
4002 vec_full_reg_offset(s, a->rm),
4003 vec_full_reg_offset(s, a->ra),
4004 pred_full_reg_offset(s, a->pg),
4005 status, vsz, vsz, 0, fn);
4006 tcg_temp_free_ptr(status);
6ceabaad 4007 }
6ceabaad
RH
4008 return true;
4009}
4010
4011#define DO_FMLA(NAME, name) \
3a7be554 4012static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 4013{ \
08975da9 4014 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
4015 NULL, gen_helper_sve_##name##_h, \
4016 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4017 }; \
4018 return do_fmla(s, a, fns[a->esz]); \
4019}
4020
4021DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4022DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4023DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4024DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4025
4026#undef DO_FMLA
4027
3a7be554 4028static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4029{
08975da9
RH
4030 static gen_helper_gvec_5_ptr * const fns[4] = {
4031 NULL,
05f48bab
RH
4032 gen_helper_sve_fcmla_zpzzz_h,
4033 gen_helper_sve_fcmla_zpzzz_s,
4034 gen_helper_sve_fcmla_zpzzz_d,
4035 };
4036
4037 if (a->esz == 0) {
4038 return false;
4039 }
4040 if (sve_access_check(s)) {
4041 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4042 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4043 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4044 vec_full_reg_offset(s, a->rn),
4045 vec_full_reg_offset(s, a->rm),
4046 vec_full_reg_offset(s, a->ra),
4047 pred_full_reg_offset(s, a->pg),
4048 status, vsz, vsz, a->rot, fns[a->esz]);
4049 tcg_temp_free_ptr(status);
05f48bab
RH
4050 }
4051 return true;
4052}
4053
3a7be554 4054static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4055{
41bf9b67
RH
4056 static gen_helper_gvec_4_ptr * const fns[4] = {
4057 NULL,
18fc2405
RH
4058 gen_helper_gvec_fcmlah_idx,
4059 gen_helper_gvec_fcmlas_idx,
41bf9b67 4060 NULL,
18fc2405
RH
4061 };
4062
18fc2405 4063 tcg_debug_assert(a->rd == a->ra);
41bf9b67
RH
4064
4065 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
4066 a->index * 4 + a->rot,
4067 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
4068}
4069
8092c6a3
RH
4070/*
4071 *** SVE Floating Point Unary Operations Predicated Group
4072 */
4073
0360730c
RH
4074TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4075 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4076TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4077 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4078
4079TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4080 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4081
4082TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4083 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4084TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4085 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4086TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4087 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4088TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4089 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4090
4091TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4092 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4093TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4094 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4095TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4096 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4097TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4098 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4099TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4100 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4101TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4102 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4103
4104TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4105 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4106TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4107 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4108TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4109 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4110TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4111 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4112TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4113 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4114TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4115 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4116
4117TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4118 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4119TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4120 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
df4de1af 4121
ed6bb6b4
RH
4122static gen_helper_gvec_3_ptr * const frint_fns[] = {
4123 NULL,
cda3c753
RH
4124 gen_helper_sve_frint_h,
4125 gen_helper_sve_frint_s,
4126 gen_helper_sve_frint_d
4127};
0360730c
RH
4128TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4129 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
cda3c753 4130
0360730c
RH
4131static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4132 NULL,
4133 gen_helper_sve_frintx_h,
4134 gen_helper_sve_frintx_s,
4135 gen_helper_sve_frintx_d
4136};
4137TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4138 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4139
95365277
SL
4140static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4141 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4142{
13c0dd17
RH
4143 unsigned vsz;
4144 TCGv_i32 tmode;
4145 TCGv_ptr status;
cda3c753 4146
13c0dd17
RH
4147 if (fn == NULL) {
4148 return false;
4149 }
4150 if (!sve_access_check(s)) {
4151 return true;
4152 }
cda3c753 4153
13c0dd17
RH
4154 vsz = vec_full_reg_size(s);
4155 tmode = tcg_const_i32(mode);
4156 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4157
13c0dd17
RH
4158 gen_helper_set_rmode(tmode, tmode, status);
4159
4160 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4161 vec_full_reg_offset(s, a->rn),
4162 pred_full_reg_offset(s, a->pg),
4163 status, vsz, vsz, 0, fn);
4164
4165 gen_helper_set_rmode(tmode, tmode, status);
4166 tcg_temp_free_i32(tmode);
4167 tcg_temp_free_ptr(status);
cda3c753
RH
4168 return true;
4169}
4170
27645836
RH
4171TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4172 float_round_nearest_even, frint_fns[a->esz])
4173TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4174 float_round_up, frint_fns[a->esz])
4175TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4176 float_round_down, frint_fns[a->esz])
4177TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4178 float_round_to_zero, frint_fns[a->esz])
4179TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4180 float_round_ties_away, frint_fns[a->esz])
cda3c753 4181
0360730c
RH
4182static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4183 NULL, gen_helper_sve_frecpx_h,
4184 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4185};
4186TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4187 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
8092c6a3 4188
0360730c
RH
4189static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4190 NULL, gen_helper_sve_fsqrt_h,
4191 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4192};
4193TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4194 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4195
4196TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4197 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4198TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4199 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4200TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4201 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4202
4203TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4204 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4205TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4206 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4207
4208TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4209 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4210TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4211 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4212
4213TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4214 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4215TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4216 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4217TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4218 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4219
4220TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4221 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4222TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4223 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4224TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4225 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4226
4227TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4228 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
8092c6a3 4229
d1822297
RH
4230/*
4231 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4232 */
4233
4234/* Subroutine loading a vector register at VOFS of LEN bytes.
4235 * The load should begin at the address Rn + IMM.
4236 */
4237
19f2acc9 4238static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4239{
19f2acc9
RH
4240 int len_align = QEMU_ALIGN_DOWN(len, 8);
4241 int len_remain = len % 8;
4242 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4243 int midx = get_mem_index(s);
b2aa8879 4244 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4245
b2aa8879
RH
4246 dirty_addr = tcg_temp_new_i64();
4247 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4248 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4249 tcg_temp_free_i64(dirty_addr);
d1822297 4250
b2aa8879
RH
4251 /*
4252 * Note that unpredicated load/store of vector/predicate registers
d1822297 4253 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4254 * operations on larger quantities.
d1822297
RH
4255 * Attempt to keep code expansion to a minimum by limiting the
4256 * amount of unrolling done.
4257 */
4258 if (nparts <= 4) {
4259 int i;
4260
b2aa8879 4261 t0 = tcg_temp_new_i64();
d1822297 4262 for (i = 0; i < len_align; i += 8) {
fc313c64 4263 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4264 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4265 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4266 }
b2aa8879 4267 tcg_temp_free_i64(t0);
d1822297
RH
4268 } else {
4269 TCGLabel *loop = gen_new_label();
4270 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4271
b2aa8879
RH
4272 /* Copy the clean address into a local temp, live across the loop. */
4273 t0 = clean_addr;
4b4dc975 4274 clean_addr = new_tmp_a64_local(s);
b2aa8879 4275 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4276
b2aa8879 4277 gen_set_label(loop);
d1822297 4278
b2aa8879 4279 t0 = tcg_temp_new_i64();
fc313c64 4280 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4281 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4282
b2aa8879 4283 tp = tcg_temp_new_ptr();
d1822297
RH
4284 tcg_gen_add_ptr(tp, cpu_env, i);
4285 tcg_gen_addi_ptr(i, i, 8);
4286 tcg_gen_st_i64(t0, tp, vofs);
4287 tcg_temp_free_ptr(tp);
b2aa8879 4288 tcg_temp_free_i64(t0);
d1822297
RH
4289
4290 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4291 tcg_temp_free_ptr(i);
4292 }
4293
b2aa8879
RH
4294 /*
4295 * Predicate register loads can be any multiple of 2.
d1822297
RH
4296 * Note that we still store the entire 64-bit unit into cpu_env.
4297 */
4298 if (len_remain) {
b2aa8879 4299 t0 = tcg_temp_new_i64();
d1822297
RH
4300 switch (len_remain) {
4301 case 2:
4302 case 4:
4303 case 8:
b2aa8879
RH
4304 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4305 MO_LE | ctz32(len_remain));
d1822297
RH
4306 break;
4307
4308 case 6:
4309 t1 = tcg_temp_new_i64();
b2aa8879
RH
4310 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4311 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4312 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4313 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4314 tcg_temp_free_i64(t1);
4315 break;
4316
4317 default:
4318 g_assert_not_reached();
4319 }
4320 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4321 tcg_temp_free_i64(t0);
d1822297 4322 }
d1822297
RH
4323}
4324
5047c204 4325/* Similarly for stores. */
19f2acc9 4326static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4327{
19f2acc9
RH
4328 int len_align = QEMU_ALIGN_DOWN(len, 8);
4329 int len_remain = len % 8;
4330 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4331 int midx = get_mem_index(s);
bba87d0a 4332 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4333
bba87d0a
RH
4334 dirty_addr = tcg_temp_new_i64();
4335 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4336 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4337 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4338
4339 /* Note that unpredicated load/store of vector/predicate registers
4340 * are defined as a stream of bytes, which equates to little-endian
4341 * operations on larger quantities. There is no nice way to force
4342 * a little-endian store for aarch64_be-linux-user out of line.
4343 *
4344 * Attempt to keep code expansion to a minimum by limiting the
4345 * amount of unrolling done.
4346 */
4347 if (nparts <= 4) {
4348 int i;
4349
bba87d0a 4350 t0 = tcg_temp_new_i64();
5047c204
RH
4351 for (i = 0; i < len_align; i += 8) {
4352 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4353 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4354 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4355 }
bba87d0a 4356 tcg_temp_free_i64(t0);
5047c204
RH
4357 } else {
4358 TCGLabel *loop = gen_new_label();
bba87d0a 4359 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4360
bba87d0a
RH
4361 /* Copy the clean address into a local temp, live across the loop. */
4362 t0 = clean_addr;
4b4dc975 4363 clean_addr = new_tmp_a64_local(s);
bba87d0a 4364 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4365
bba87d0a 4366 gen_set_label(loop);
5047c204 4367
bba87d0a
RH
4368 t0 = tcg_temp_new_i64();
4369 tp = tcg_temp_new_ptr();
4370 tcg_gen_add_ptr(tp, cpu_env, i);
4371 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4372 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4373 tcg_temp_free_ptr(tp);
4374
fc313c64 4375 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4376 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4377 tcg_temp_free_i64(t0);
5047c204
RH
4378
4379 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4380 tcg_temp_free_ptr(i);
4381 }
4382
4383 /* Predicate register stores can be any multiple of 2. */
4384 if (len_remain) {
bba87d0a 4385 t0 = tcg_temp_new_i64();
5047c204 4386 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4387
4388 switch (len_remain) {
4389 case 2:
4390 case 4:
4391 case 8:
bba87d0a
RH
4392 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4393 MO_LE | ctz32(len_remain));
5047c204
RH
4394 break;
4395
4396 case 6:
bba87d0a
RH
4397 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4398 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4399 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4400 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4401 break;
4402
4403 default:
4404 g_assert_not_reached();
4405 }
bba87d0a 4406 tcg_temp_free_i64(t0);
5047c204 4407 }
5047c204
RH
4408}
4409
3a7be554 4410static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4411{
4412 if (sve_access_check(s)) {
4413 int size = vec_full_reg_size(s);
4414 int off = vec_full_reg_offset(s, a->rd);
4415 do_ldr(s, off, size, a->rn, a->imm * size);
4416 }
4417 return true;
4418}
4419
3a7be554 4420static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4421{
4422 if (sve_access_check(s)) {
4423 int size = pred_full_reg_size(s);
4424 int off = pred_full_reg_offset(s, a->rd);
4425 do_ldr(s, off, size, a->rn, a->imm * size);
4426 }
4427 return true;
4428}
c4e7c493 4429
3a7be554 4430static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4431{
4432 if (sve_access_check(s)) {
4433 int size = vec_full_reg_size(s);
4434 int off = vec_full_reg_offset(s, a->rd);
4435 do_str(s, off, size, a->rn, a->imm * size);
4436 }
4437 return true;
4438}
4439
3a7be554 4440static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4441{
4442 if (sve_access_check(s)) {
4443 int size = pred_full_reg_size(s);
4444 int off = pred_full_reg_offset(s, a->rd);
4445 do_str(s, off, size, a->rn, a->imm * size);
4446 }
4447 return true;
4448}
4449
c4e7c493
RH
4450/*
4451 *** SVE Memory - Contiguous Load Group
4452 */
4453
4454/* The memory mode of the dtype. */
14776ab5 4455static const MemOp dtype_mop[16] = {
c4e7c493
RH
4456 MO_UB, MO_UB, MO_UB, MO_UB,
4457 MO_SL, MO_UW, MO_UW, MO_UW,
4458 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4459 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4460};
4461
4462#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4463
4464/* The vector element size of dtype. */
4465static const uint8_t dtype_esz[16] = {
4466 0, 1, 2, 3,
4467 3, 1, 2, 3,
4468 3, 2, 2, 3,
4469 3, 2, 1, 3
4470};
4471
4472static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4473 int dtype, uint32_t mte_n, bool is_write,
4474 gen_helper_gvec_mem *fn)
c4e7c493
RH
4475{
4476 unsigned vsz = vec_full_reg_size(s);
4477 TCGv_ptr t_pg;
206adacf 4478 int desc = 0;
c4e7c493 4479
206adacf
RH
4480 /*
4481 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4482 * registers as pointers, so encode the regno into the data field.
4483 * For consistency, do this even for LD1.
4484 */
9473d0ec 4485 if (s->mte_active[0]) {
206adacf
RH
4486 int msz = dtype_msz(dtype);
4487
4488 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4489 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4490 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4491 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4492 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4493 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4494 } else {
4495 addr = clean_data_tbi(s, addr);
206adacf 4496 }
9473d0ec 4497
206adacf 4498 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4499 t_pg = tcg_temp_new_ptr();
4500
4501 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4502 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4503
4504 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4505}
4506
c182c6db
RH
4507/* Indexed by [mte][be][dtype][nreg] */
4508static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4509 { /* mte inactive, little-endian */
4510 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4511 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4512 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4513 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4514 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4515
4516 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4517 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4518 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4519 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4520 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4521
4522 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4523 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4524 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4525 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4526 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4527
4528 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4529 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4530 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4531 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4532 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4533
4534 /* mte inactive, big-endian */
4535 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4536 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4537 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4538 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4539 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4540
4541 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4542 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4543 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4544 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4545 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4546
4547 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4549 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4550 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4551 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4552
4553 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4555 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4557 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4558
4559 { /* mte active, little-endian */
4560 { { gen_helper_sve_ld1bb_r_mte,
4561 gen_helper_sve_ld2bb_r_mte,
4562 gen_helper_sve_ld3bb_r_mte,
4563 gen_helper_sve_ld4bb_r_mte },
4564 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4566 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4567
4568 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4569 { gen_helper_sve_ld1hh_le_r_mte,
4570 gen_helper_sve_ld2hh_le_r_mte,
4571 gen_helper_sve_ld3hh_le_r_mte,
4572 gen_helper_sve_ld4hh_le_r_mte },
4573 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4574 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4575
4576 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4577 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4578 { gen_helper_sve_ld1ss_le_r_mte,
4579 gen_helper_sve_ld2ss_le_r_mte,
4580 gen_helper_sve_ld3ss_le_r_mte,
4581 gen_helper_sve_ld4ss_le_r_mte },
4582 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4583
4584 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4585 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4586 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4587 { gen_helper_sve_ld1dd_le_r_mte,
4588 gen_helper_sve_ld2dd_le_r_mte,
4589 gen_helper_sve_ld3dd_le_r_mte,
4590 gen_helper_sve_ld4dd_le_r_mte } },
4591
4592 /* mte active, big-endian */
4593 { { gen_helper_sve_ld1bb_r_mte,
4594 gen_helper_sve_ld2bb_r_mte,
4595 gen_helper_sve_ld3bb_r_mte,
4596 gen_helper_sve_ld4bb_r_mte },
4597 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4598 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4600
4601 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4602 { gen_helper_sve_ld1hh_be_r_mte,
4603 gen_helper_sve_ld2hh_be_r_mte,
4604 gen_helper_sve_ld3hh_be_r_mte,
4605 gen_helper_sve_ld4hh_be_r_mte },
4606 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4607 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4608
4609 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4610 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1ss_be_r_mte,
4612 gen_helper_sve_ld2ss_be_r_mte,
4613 gen_helper_sve_ld3ss_be_r_mte,
4614 gen_helper_sve_ld4ss_be_r_mte },
4615 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4616
4617 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4618 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4619 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4620 { gen_helper_sve_ld1dd_be_r_mte,
4621 gen_helper_sve_ld2dd_be_r_mte,
4622 gen_helper_sve_ld3dd_be_r_mte,
4623 gen_helper_sve_ld4dd_be_r_mte } } },
4624};
4625
c4e7c493
RH
4626static void do_ld_zpa(DisasContext *s, int zt, int pg,
4627 TCGv_i64 addr, int dtype, int nreg)
4628{
206adacf 4629 gen_helper_gvec_mem *fn
c182c6db 4630 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4631
206adacf
RH
4632 /*
4633 * While there are holes in the table, they are not
c4e7c493
RH
4634 * accessible via the instruction encoding.
4635 */
4636 assert(fn != NULL);
206adacf 4637 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4638}
4639
3a7be554 4640static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4641{
4642 if (a->rm == 31) {
4643 return false;
4644 }
4645 if (sve_access_check(s)) {
4646 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4647 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4648 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4649 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4650 }
4651 return true;
4652}
4653
3a7be554 4654static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4655{
4656 if (sve_access_check(s)) {
4657 int vsz = vec_full_reg_size(s);
4658 int elements = vsz >> dtype_esz[a->dtype];
4659 TCGv_i64 addr = new_tmp_a64(s);
4660
4661 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4662 (a->imm * elements * (a->nreg + 1))
4663 << dtype_msz(a->dtype));
4664 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4665 }
4666 return true;
4667}
e2654d75 4668
3a7be554 4669static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4670{
aa13f7c3
RH
4671 static gen_helper_gvec_mem * const fns[2][2][16] = {
4672 { /* mte inactive, little-endian */
4673 { gen_helper_sve_ldff1bb_r,
4674 gen_helper_sve_ldff1bhu_r,
4675 gen_helper_sve_ldff1bsu_r,
4676 gen_helper_sve_ldff1bdu_r,
4677
4678 gen_helper_sve_ldff1sds_le_r,
4679 gen_helper_sve_ldff1hh_le_r,
4680 gen_helper_sve_ldff1hsu_le_r,
4681 gen_helper_sve_ldff1hdu_le_r,
4682
4683 gen_helper_sve_ldff1hds_le_r,
4684 gen_helper_sve_ldff1hss_le_r,
4685 gen_helper_sve_ldff1ss_le_r,
4686 gen_helper_sve_ldff1sdu_le_r,
4687
4688 gen_helper_sve_ldff1bds_r,
4689 gen_helper_sve_ldff1bss_r,
4690 gen_helper_sve_ldff1bhs_r,
4691 gen_helper_sve_ldff1dd_le_r },
4692
4693 /* mte inactive, big-endian */
4694 { gen_helper_sve_ldff1bb_r,
4695 gen_helper_sve_ldff1bhu_r,
4696 gen_helper_sve_ldff1bsu_r,
4697 gen_helper_sve_ldff1bdu_r,
4698
4699 gen_helper_sve_ldff1sds_be_r,
4700 gen_helper_sve_ldff1hh_be_r,
4701 gen_helper_sve_ldff1hsu_be_r,
4702 gen_helper_sve_ldff1hdu_be_r,
4703
4704 gen_helper_sve_ldff1hds_be_r,
4705 gen_helper_sve_ldff1hss_be_r,
4706 gen_helper_sve_ldff1ss_be_r,
4707 gen_helper_sve_ldff1sdu_be_r,
4708
4709 gen_helper_sve_ldff1bds_r,
4710 gen_helper_sve_ldff1bss_r,
4711 gen_helper_sve_ldff1bhs_r,
4712 gen_helper_sve_ldff1dd_be_r } },
4713
4714 { /* mte active, little-endian */
4715 { gen_helper_sve_ldff1bb_r_mte,
4716 gen_helper_sve_ldff1bhu_r_mte,
4717 gen_helper_sve_ldff1bsu_r_mte,
4718 gen_helper_sve_ldff1bdu_r_mte,
4719
4720 gen_helper_sve_ldff1sds_le_r_mte,
4721 gen_helper_sve_ldff1hh_le_r_mte,
4722 gen_helper_sve_ldff1hsu_le_r_mte,
4723 gen_helper_sve_ldff1hdu_le_r_mte,
4724
4725 gen_helper_sve_ldff1hds_le_r_mte,
4726 gen_helper_sve_ldff1hss_le_r_mte,
4727 gen_helper_sve_ldff1ss_le_r_mte,
4728 gen_helper_sve_ldff1sdu_le_r_mte,
4729
4730 gen_helper_sve_ldff1bds_r_mte,
4731 gen_helper_sve_ldff1bss_r_mte,
4732 gen_helper_sve_ldff1bhs_r_mte,
4733 gen_helper_sve_ldff1dd_le_r_mte },
4734
4735 /* mte active, big-endian */
4736 { gen_helper_sve_ldff1bb_r_mte,
4737 gen_helper_sve_ldff1bhu_r_mte,
4738 gen_helper_sve_ldff1bsu_r_mte,
4739 gen_helper_sve_ldff1bdu_r_mte,
4740
4741 gen_helper_sve_ldff1sds_be_r_mte,
4742 gen_helper_sve_ldff1hh_be_r_mte,
4743 gen_helper_sve_ldff1hsu_be_r_mte,
4744 gen_helper_sve_ldff1hdu_be_r_mte,
4745
4746 gen_helper_sve_ldff1hds_be_r_mte,
4747 gen_helper_sve_ldff1hss_be_r_mte,
4748 gen_helper_sve_ldff1ss_be_r_mte,
4749 gen_helper_sve_ldff1sdu_be_r_mte,
4750
4751 gen_helper_sve_ldff1bds_r_mte,
4752 gen_helper_sve_ldff1bss_r_mte,
4753 gen_helper_sve_ldff1bhs_r_mte,
4754 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4755 };
4756
4757 if (sve_access_check(s)) {
4758 TCGv_i64 addr = new_tmp_a64(s);
4759 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4760 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4761 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4762 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4763 }
4764 return true;
4765}
4766
3a7be554 4767static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4768{
aa13f7c3
RH
4769 static gen_helper_gvec_mem * const fns[2][2][16] = {
4770 { /* mte inactive, little-endian */
4771 { gen_helper_sve_ldnf1bb_r,
4772 gen_helper_sve_ldnf1bhu_r,
4773 gen_helper_sve_ldnf1bsu_r,
4774 gen_helper_sve_ldnf1bdu_r,
4775
4776 gen_helper_sve_ldnf1sds_le_r,
4777 gen_helper_sve_ldnf1hh_le_r,
4778 gen_helper_sve_ldnf1hsu_le_r,
4779 gen_helper_sve_ldnf1hdu_le_r,
4780
4781 gen_helper_sve_ldnf1hds_le_r,
4782 gen_helper_sve_ldnf1hss_le_r,
4783 gen_helper_sve_ldnf1ss_le_r,
4784 gen_helper_sve_ldnf1sdu_le_r,
4785
4786 gen_helper_sve_ldnf1bds_r,
4787 gen_helper_sve_ldnf1bss_r,
4788 gen_helper_sve_ldnf1bhs_r,
4789 gen_helper_sve_ldnf1dd_le_r },
4790
4791 /* mte inactive, big-endian */
4792 { gen_helper_sve_ldnf1bb_r,
4793 gen_helper_sve_ldnf1bhu_r,
4794 gen_helper_sve_ldnf1bsu_r,
4795 gen_helper_sve_ldnf1bdu_r,
4796
4797 gen_helper_sve_ldnf1sds_be_r,
4798 gen_helper_sve_ldnf1hh_be_r,
4799 gen_helper_sve_ldnf1hsu_be_r,
4800 gen_helper_sve_ldnf1hdu_be_r,
4801
4802 gen_helper_sve_ldnf1hds_be_r,
4803 gen_helper_sve_ldnf1hss_be_r,
4804 gen_helper_sve_ldnf1ss_be_r,
4805 gen_helper_sve_ldnf1sdu_be_r,
4806
4807 gen_helper_sve_ldnf1bds_r,
4808 gen_helper_sve_ldnf1bss_r,
4809 gen_helper_sve_ldnf1bhs_r,
4810 gen_helper_sve_ldnf1dd_be_r } },
4811
4812 { /* mte inactive, little-endian */
4813 { gen_helper_sve_ldnf1bb_r_mte,
4814 gen_helper_sve_ldnf1bhu_r_mte,
4815 gen_helper_sve_ldnf1bsu_r_mte,
4816 gen_helper_sve_ldnf1bdu_r_mte,
4817
4818 gen_helper_sve_ldnf1sds_le_r_mte,
4819 gen_helper_sve_ldnf1hh_le_r_mte,
4820 gen_helper_sve_ldnf1hsu_le_r_mte,
4821 gen_helper_sve_ldnf1hdu_le_r_mte,
4822
4823 gen_helper_sve_ldnf1hds_le_r_mte,
4824 gen_helper_sve_ldnf1hss_le_r_mte,
4825 gen_helper_sve_ldnf1ss_le_r_mte,
4826 gen_helper_sve_ldnf1sdu_le_r_mte,
4827
4828 gen_helper_sve_ldnf1bds_r_mte,
4829 gen_helper_sve_ldnf1bss_r_mte,
4830 gen_helper_sve_ldnf1bhs_r_mte,
4831 gen_helper_sve_ldnf1dd_le_r_mte },
4832
4833 /* mte inactive, big-endian */
4834 { gen_helper_sve_ldnf1bb_r_mte,
4835 gen_helper_sve_ldnf1bhu_r_mte,
4836 gen_helper_sve_ldnf1bsu_r_mte,
4837 gen_helper_sve_ldnf1bdu_r_mte,
4838
4839 gen_helper_sve_ldnf1sds_be_r_mte,
4840 gen_helper_sve_ldnf1hh_be_r_mte,
4841 gen_helper_sve_ldnf1hsu_be_r_mte,
4842 gen_helper_sve_ldnf1hdu_be_r_mte,
4843
4844 gen_helper_sve_ldnf1hds_be_r_mte,
4845 gen_helper_sve_ldnf1hss_be_r_mte,
4846 gen_helper_sve_ldnf1ss_be_r_mte,
4847 gen_helper_sve_ldnf1sdu_be_r_mte,
4848
4849 gen_helper_sve_ldnf1bds_r_mte,
4850 gen_helper_sve_ldnf1bss_r_mte,
4851 gen_helper_sve_ldnf1bhs_r_mte,
4852 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4853 };
4854
4855 if (sve_access_check(s)) {
4856 int vsz = vec_full_reg_size(s);
4857 int elements = vsz >> dtype_esz[a->dtype];
4858 int off = (a->imm * elements) << dtype_msz(a->dtype);
4859 TCGv_i64 addr = new_tmp_a64(s);
4860
4861 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4862 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4863 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4864 }
4865 return true;
4866}
1a039c7e 4867
c182c6db 4868static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 4869{
05abe304
RH
4870 unsigned vsz = vec_full_reg_size(s);
4871 TCGv_ptr t_pg;
7924d239 4872 int poff;
05abe304
RH
4873
4874 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
4875 poff = pred_full_reg_offset(s, pg);
4876 if (vsz > 16) {
4877 /*
4878 * Zero-extend the first 16 bits of the predicate into a temporary.
4879 * This avoids triggering an assert making sure we don't have bits
4880 * set within a predicate beyond VQ, but we have lowered VQ to 1
4881 * for this load operation.
4882 */
4883 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4884#if HOST_BIG_ENDIAN
2a99ab2b
RH
4885 poff += 6;
4886#endif
4887 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4888
4889 poff = offsetof(CPUARMState, vfp.preg_tmp);
4890 tcg_gen_st_i64(tmp, cpu_env, poff);
4891 tcg_temp_free_i64(tmp);
4892 }
4893
05abe304 4894 t_pg = tcg_temp_new_ptr();
2a99ab2b 4895 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4896
c182c6db
RH
4897 gen_helper_gvec_mem *fn
4898 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 4899 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
4900
4901 tcg_temp_free_ptr(t_pg);
05abe304
RH
4902
4903 /* Replicate that first quadword. */
4904 if (vsz > 16) {
7924d239
RH
4905 int doff = vec_full_reg_offset(s, zt);
4906 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
4907 }
4908}
4909
3a7be554 4910static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4911{
4912 if (a->rm == 31) {
4913 return false;
4914 }
4915 if (sve_access_check(s)) {
4916 int msz = dtype_msz(a->dtype);
4917 TCGv_i64 addr = new_tmp_a64(s);
4918 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4919 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 4920 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4921 }
4922 return true;
4923}
4924
3a7be554 4925static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4926{
4927 if (sve_access_check(s)) {
4928 TCGv_i64 addr = new_tmp_a64(s);
4929 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 4930 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4931 }
4932 return true;
4933}
4934
12c563f6
RH
4935static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4936{
4937 unsigned vsz = vec_full_reg_size(s);
4938 unsigned vsz_r32;
4939 TCGv_ptr t_pg;
4940 int poff, doff;
4941
4942 if (vsz < 32) {
4943 /*
4944 * Note that this UNDEFINED check comes after CheckSVEEnabled()
4945 * in the ARM pseudocode, which is the sve_access_check() done
4946 * in our caller. We should not now return false from the caller.
4947 */
4948 unallocated_encoding(s);
4949 return;
4950 }
4951
4952 /* Load the first octaword using the normal predicated load helpers. */
4953
4954 poff = pred_full_reg_offset(s, pg);
4955 if (vsz > 32) {
4956 /*
4957 * Zero-extend the first 32 bits of the predicate into a temporary.
4958 * This avoids triggering an assert making sure we don't have bits
4959 * set within a predicate beyond VQ, but we have lowered VQ to 2
4960 * for this load operation.
4961 */
4962 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4963#if HOST_BIG_ENDIAN
12c563f6
RH
4964 poff += 4;
4965#endif
4966 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
4967
4968 poff = offsetof(CPUARMState, vfp.preg_tmp);
4969 tcg_gen_st_i64(tmp, cpu_env, poff);
4970 tcg_temp_free_i64(tmp);
4971 }
4972
4973 t_pg = tcg_temp_new_ptr();
4974 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4975
4976 gen_helper_gvec_mem *fn
4977 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4978 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
4979
4980 tcg_temp_free_ptr(t_pg);
4981
4982 /*
4983 * Replicate that first octaword.
4984 * The replication happens in units of 32; if the full vector size
4985 * is not a multiple of 32, the final bits are zeroed.
4986 */
4987 doff = vec_full_reg_offset(s, zt);
4988 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4989 if (vsz >= 64) {
4990 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4991 }
4992 vsz -= vsz_r32;
4993 if (vsz) {
4994 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4995 }
4996}
4997
4998static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4999{
5000 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5001 return false;
5002 }
5003 if (a->rm == 31) {
5004 return false;
5005 }
5006 if (sve_access_check(s)) {
5007 TCGv_i64 addr = new_tmp_a64(s);
5008 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5009 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5010 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5011 }
5012 return true;
5013}
5014
5015static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5016{
5017 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5018 return false;
5019 }
5020 if (sve_access_check(s)) {
5021 TCGv_i64 addr = new_tmp_a64(s);
5022 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5023 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5024 }
5025 return true;
5026}
5027
68459864 5028/* Load and broadcast element. */
3a7be554 5029static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5030{
68459864
RH
5031 unsigned vsz = vec_full_reg_size(s);
5032 unsigned psz = pred_full_reg_size(s);
5033 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5034 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5035 TCGLabel *over;
4ac430e1 5036 TCGv_i64 temp, clean_addr;
68459864 5037
c0ed9166
RH
5038 if (!sve_access_check(s)) {
5039 return true;
5040 }
5041
5042 over = gen_new_label();
5043
68459864
RH
5044 /* If the guarding predicate has no bits set, no load occurs. */
5045 if (psz <= 8) {
5046 /* Reduce the pred_esz_masks value simply to reduce the
5047 * size of the code generated here.
5048 */
5049 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5050 temp = tcg_temp_new_i64();
5051 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5052 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5053 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5054 tcg_temp_free_i64(temp);
5055 } else {
5056 TCGv_i32 t32 = tcg_temp_new_i32();
5057 find_last_active(s, t32, esz, a->pg);
5058 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5059 tcg_temp_free_i32(t32);
5060 }
5061
5062 /* Load the data. */
5063 temp = tcg_temp_new_i64();
d0e372b0 5064 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5065 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5066
5067 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5068 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5069
5070 /* Broadcast to *all* elements. */
5071 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5072 vsz, vsz, temp);
5073 tcg_temp_free_i64(temp);
5074
5075 /* Zero the inactive elements. */
5076 gen_set_label(over);
60245996 5077 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5078}
5079
1a039c7e
RH
5080static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5081 int msz, int esz, int nreg)
5082{
71b9f394
RH
5083 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5084 { { { gen_helper_sve_st1bb_r,
5085 gen_helper_sve_st1bh_r,
5086 gen_helper_sve_st1bs_r,
5087 gen_helper_sve_st1bd_r },
5088 { NULL,
5089 gen_helper_sve_st1hh_le_r,
5090 gen_helper_sve_st1hs_le_r,
5091 gen_helper_sve_st1hd_le_r },
5092 { NULL, NULL,
5093 gen_helper_sve_st1ss_le_r,
5094 gen_helper_sve_st1sd_le_r },
5095 { NULL, NULL, NULL,
5096 gen_helper_sve_st1dd_le_r } },
5097 { { gen_helper_sve_st1bb_r,
5098 gen_helper_sve_st1bh_r,
5099 gen_helper_sve_st1bs_r,
5100 gen_helper_sve_st1bd_r },
5101 { NULL,
5102 gen_helper_sve_st1hh_be_r,
5103 gen_helper_sve_st1hs_be_r,
5104 gen_helper_sve_st1hd_be_r },
5105 { NULL, NULL,
5106 gen_helper_sve_st1ss_be_r,
5107 gen_helper_sve_st1sd_be_r },
5108 { NULL, NULL, NULL,
5109 gen_helper_sve_st1dd_be_r } } },
5110
5111 { { { gen_helper_sve_st1bb_r_mte,
5112 gen_helper_sve_st1bh_r_mte,
5113 gen_helper_sve_st1bs_r_mte,
5114 gen_helper_sve_st1bd_r_mte },
5115 { NULL,
5116 gen_helper_sve_st1hh_le_r_mte,
5117 gen_helper_sve_st1hs_le_r_mte,
5118 gen_helper_sve_st1hd_le_r_mte },
5119 { NULL, NULL,
5120 gen_helper_sve_st1ss_le_r_mte,
5121 gen_helper_sve_st1sd_le_r_mte },
5122 { NULL, NULL, NULL,
5123 gen_helper_sve_st1dd_le_r_mte } },
5124 { { gen_helper_sve_st1bb_r_mte,
5125 gen_helper_sve_st1bh_r_mte,
5126 gen_helper_sve_st1bs_r_mte,
5127 gen_helper_sve_st1bd_r_mte },
5128 { NULL,
5129 gen_helper_sve_st1hh_be_r_mte,
5130 gen_helper_sve_st1hs_be_r_mte,
5131 gen_helper_sve_st1hd_be_r_mte },
5132 { NULL, NULL,
5133 gen_helper_sve_st1ss_be_r_mte,
5134 gen_helper_sve_st1sd_be_r_mte },
5135 { NULL, NULL, NULL,
5136 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5137 };
71b9f394
RH
5138 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5139 { { { gen_helper_sve_st2bb_r,
5140 gen_helper_sve_st2hh_le_r,
5141 gen_helper_sve_st2ss_le_r,
5142 gen_helper_sve_st2dd_le_r },
5143 { gen_helper_sve_st3bb_r,
5144 gen_helper_sve_st3hh_le_r,
5145 gen_helper_sve_st3ss_le_r,
5146 gen_helper_sve_st3dd_le_r },
5147 { gen_helper_sve_st4bb_r,
5148 gen_helper_sve_st4hh_le_r,
5149 gen_helper_sve_st4ss_le_r,
5150 gen_helper_sve_st4dd_le_r } },
5151 { { gen_helper_sve_st2bb_r,
5152 gen_helper_sve_st2hh_be_r,
5153 gen_helper_sve_st2ss_be_r,
5154 gen_helper_sve_st2dd_be_r },
5155 { gen_helper_sve_st3bb_r,
5156 gen_helper_sve_st3hh_be_r,
5157 gen_helper_sve_st3ss_be_r,
5158 gen_helper_sve_st3dd_be_r },
5159 { gen_helper_sve_st4bb_r,
5160 gen_helper_sve_st4hh_be_r,
5161 gen_helper_sve_st4ss_be_r,
5162 gen_helper_sve_st4dd_be_r } } },
5163 { { { gen_helper_sve_st2bb_r_mte,
5164 gen_helper_sve_st2hh_le_r_mte,
5165 gen_helper_sve_st2ss_le_r_mte,
5166 gen_helper_sve_st2dd_le_r_mte },
5167 { gen_helper_sve_st3bb_r_mte,
5168 gen_helper_sve_st3hh_le_r_mte,
5169 gen_helper_sve_st3ss_le_r_mte,
5170 gen_helper_sve_st3dd_le_r_mte },
5171 { gen_helper_sve_st4bb_r_mte,
5172 gen_helper_sve_st4hh_le_r_mte,
5173 gen_helper_sve_st4ss_le_r_mte,
5174 gen_helper_sve_st4dd_le_r_mte } },
5175 { { gen_helper_sve_st2bb_r_mte,
5176 gen_helper_sve_st2hh_be_r_mte,
5177 gen_helper_sve_st2ss_be_r_mte,
5178 gen_helper_sve_st2dd_be_r_mte },
5179 { gen_helper_sve_st3bb_r_mte,
5180 gen_helper_sve_st3hh_be_r_mte,
5181 gen_helper_sve_st3ss_be_r_mte,
5182 gen_helper_sve_st3dd_be_r_mte },
5183 { gen_helper_sve_st4bb_r_mte,
5184 gen_helper_sve_st4hh_be_r_mte,
5185 gen_helper_sve_st4ss_be_r_mte,
5186 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5187 };
5188 gen_helper_gvec_mem *fn;
28d57f2d 5189 int be = s->be_data == MO_BE;
1a039c7e
RH
5190
5191 if (nreg == 0) {
5192 /* ST1 */
71b9f394
RH
5193 fn = fn_single[s->mte_active[0]][be][msz][esz];
5194 nreg = 1;
1a039c7e
RH
5195 } else {
5196 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5197 assert(msz == esz);
71b9f394 5198 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5199 }
5200 assert(fn != NULL);
71b9f394 5201 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5202}
5203
3a7be554 5204static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5205{
5206 if (a->rm == 31 || a->msz > a->esz) {
5207 return false;
5208 }
5209 if (sve_access_check(s)) {
5210 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5211 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5212 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5213 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5214 }
5215 return true;
5216}
5217
3a7be554 5218static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5219{
5220 if (a->msz > a->esz) {
5221 return false;
5222 }
5223 if (sve_access_check(s)) {
5224 int vsz = vec_full_reg_size(s);
5225 int elements = vsz >> a->esz;
5226 TCGv_i64 addr = new_tmp_a64(s);
5227
5228 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5229 (a->imm * elements * (a->nreg + 1)) << a->msz);
5230 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5231 }
5232 return true;
5233}
f6dbf62a
RH
5234
5235/*
5236 *** SVE gather loads / scatter stores
5237 */
5238
500d0484 5239static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5240 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5241 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5242{
5243 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5244 TCGv_ptr t_zm = tcg_temp_new_ptr();
5245 TCGv_ptr t_pg = tcg_temp_new_ptr();
5246 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5247 int desc = 0;
500d0484 5248
d28d12f0
RH
5249 if (s->mte_active[0]) {
5250 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5251 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5252 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5253 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5254 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5255 desc <<= SVE_MTEDESC_SHIFT;
5256 }
cdecb3fc 5257 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5258
5259 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5260 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5261 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5262 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5263
5264 tcg_temp_free_ptr(t_zt);
5265 tcg_temp_free_ptr(t_zm);
5266 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5267}
5268
d28d12f0
RH
5269/* Indexed by [mte][be][ff][xs][u][msz]. */
5270static gen_helper_gvec_mem_scatter * const
5271gather_load_fn32[2][2][2][2][2][3] = {
5272 { /* MTE Inactive */
5273 { /* Little-endian */
5274 { { { gen_helper_sve_ldbss_zsu,
5275 gen_helper_sve_ldhss_le_zsu,
5276 NULL, },
5277 { gen_helper_sve_ldbsu_zsu,
5278 gen_helper_sve_ldhsu_le_zsu,
5279 gen_helper_sve_ldss_le_zsu, } },
5280 { { gen_helper_sve_ldbss_zss,
5281 gen_helper_sve_ldhss_le_zss,
5282 NULL, },
5283 { gen_helper_sve_ldbsu_zss,
5284 gen_helper_sve_ldhsu_le_zss,
5285 gen_helper_sve_ldss_le_zss, } } },
5286
5287 /* First-fault */
5288 { { { gen_helper_sve_ldffbss_zsu,
5289 gen_helper_sve_ldffhss_le_zsu,
5290 NULL, },
5291 { gen_helper_sve_ldffbsu_zsu,
5292 gen_helper_sve_ldffhsu_le_zsu,
5293 gen_helper_sve_ldffss_le_zsu, } },
5294 { { gen_helper_sve_ldffbss_zss,
5295 gen_helper_sve_ldffhss_le_zss,
5296 NULL, },
5297 { gen_helper_sve_ldffbsu_zss,
5298 gen_helper_sve_ldffhsu_le_zss,
5299 gen_helper_sve_ldffss_le_zss, } } } },
5300
5301 { /* Big-endian */
5302 { { { gen_helper_sve_ldbss_zsu,
5303 gen_helper_sve_ldhss_be_zsu,
5304 NULL, },
5305 { gen_helper_sve_ldbsu_zsu,
5306 gen_helper_sve_ldhsu_be_zsu,
5307 gen_helper_sve_ldss_be_zsu, } },
5308 { { gen_helper_sve_ldbss_zss,
5309 gen_helper_sve_ldhss_be_zss,
5310 NULL, },
5311 { gen_helper_sve_ldbsu_zss,
5312 gen_helper_sve_ldhsu_be_zss,
5313 gen_helper_sve_ldss_be_zss, } } },
5314
5315 /* First-fault */
5316 { { { gen_helper_sve_ldffbss_zsu,
5317 gen_helper_sve_ldffhss_be_zsu,
5318 NULL, },
5319 { gen_helper_sve_ldffbsu_zsu,
5320 gen_helper_sve_ldffhsu_be_zsu,
5321 gen_helper_sve_ldffss_be_zsu, } },
5322 { { gen_helper_sve_ldffbss_zss,
5323 gen_helper_sve_ldffhss_be_zss,
5324 NULL, },
5325 { gen_helper_sve_ldffbsu_zss,
5326 gen_helper_sve_ldffhsu_be_zss,
5327 gen_helper_sve_ldffss_be_zss, } } } } },
5328 { /* MTE Active */
5329 { /* Little-endian */
5330 { { { gen_helper_sve_ldbss_zsu_mte,
5331 gen_helper_sve_ldhss_le_zsu_mte,
5332 NULL, },
5333 { gen_helper_sve_ldbsu_zsu_mte,
5334 gen_helper_sve_ldhsu_le_zsu_mte,
5335 gen_helper_sve_ldss_le_zsu_mte, } },
5336 { { gen_helper_sve_ldbss_zss_mte,
5337 gen_helper_sve_ldhss_le_zss_mte,
5338 NULL, },
5339 { gen_helper_sve_ldbsu_zss_mte,
5340 gen_helper_sve_ldhsu_le_zss_mte,
5341 gen_helper_sve_ldss_le_zss_mte, } } },
5342
5343 /* First-fault */
5344 { { { gen_helper_sve_ldffbss_zsu_mte,
5345 gen_helper_sve_ldffhss_le_zsu_mte,
5346 NULL, },
5347 { gen_helper_sve_ldffbsu_zsu_mte,
5348 gen_helper_sve_ldffhsu_le_zsu_mte,
5349 gen_helper_sve_ldffss_le_zsu_mte, } },
5350 { { gen_helper_sve_ldffbss_zss_mte,
5351 gen_helper_sve_ldffhss_le_zss_mte,
5352 NULL, },
5353 { gen_helper_sve_ldffbsu_zss_mte,
5354 gen_helper_sve_ldffhsu_le_zss_mte,
5355 gen_helper_sve_ldffss_le_zss_mte, } } } },
5356
5357 { /* Big-endian */
5358 { { { gen_helper_sve_ldbss_zsu_mte,
5359 gen_helper_sve_ldhss_be_zsu_mte,
5360 NULL, },
5361 { gen_helper_sve_ldbsu_zsu_mte,
5362 gen_helper_sve_ldhsu_be_zsu_mte,
5363 gen_helper_sve_ldss_be_zsu_mte, } },
5364 { { gen_helper_sve_ldbss_zss_mte,
5365 gen_helper_sve_ldhss_be_zss_mte,
5366 NULL, },
5367 { gen_helper_sve_ldbsu_zss_mte,
5368 gen_helper_sve_ldhsu_be_zss_mte,
5369 gen_helper_sve_ldss_be_zss_mte, } } },
5370
5371 /* First-fault */
5372 { { { gen_helper_sve_ldffbss_zsu_mte,
5373 gen_helper_sve_ldffhss_be_zsu_mte,
5374 NULL, },
5375 { gen_helper_sve_ldffbsu_zsu_mte,
5376 gen_helper_sve_ldffhsu_be_zsu_mte,
5377 gen_helper_sve_ldffss_be_zsu_mte, } },
5378 { { gen_helper_sve_ldffbss_zss_mte,
5379 gen_helper_sve_ldffhss_be_zss_mte,
5380 NULL, },
5381 { gen_helper_sve_ldffbsu_zss_mte,
5382 gen_helper_sve_ldffhsu_be_zss_mte,
5383 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5384};
5385
5386/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5387static gen_helper_gvec_mem_scatter * const
5388gather_load_fn64[2][2][2][3][2][4] = {
5389 { /* MTE Inactive */
5390 { /* Little-endian */
5391 { { { gen_helper_sve_ldbds_zsu,
5392 gen_helper_sve_ldhds_le_zsu,
5393 gen_helper_sve_ldsds_le_zsu,
5394 NULL, },
5395 { gen_helper_sve_ldbdu_zsu,
5396 gen_helper_sve_ldhdu_le_zsu,
5397 gen_helper_sve_ldsdu_le_zsu,
5398 gen_helper_sve_lddd_le_zsu, } },
5399 { { gen_helper_sve_ldbds_zss,
5400 gen_helper_sve_ldhds_le_zss,
5401 gen_helper_sve_ldsds_le_zss,
5402 NULL, },
5403 { gen_helper_sve_ldbdu_zss,
5404 gen_helper_sve_ldhdu_le_zss,
5405 gen_helper_sve_ldsdu_le_zss,
5406 gen_helper_sve_lddd_le_zss, } },
5407 { { gen_helper_sve_ldbds_zd,
5408 gen_helper_sve_ldhds_le_zd,
5409 gen_helper_sve_ldsds_le_zd,
5410 NULL, },
5411 { gen_helper_sve_ldbdu_zd,
5412 gen_helper_sve_ldhdu_le_zd,
5413 gen_helper_sve_ldsdu_le_zd,
5414 gen_helper_sve_lddd_le_zd, } } },
5415
5416 /* First-fault */
5417 { { { gen_helper_sve_ldffbds_zsu,
5418 gen_helper_sve_ldffhds_le_zsu,
5419 gen_helper_sve_ldffsds_le_zsu,
5420 NULL, },
5421 { gen_helper_sve_ldffbdu_zsu,
5422 gen_helper_sve_ldffhdu_le_zsu,
5423 gen_helper_sve_ldffsdu_le_zsu,
5424 gen_helper_sve_ldffdd_le_zsu, } },
5425 { { gen_helper_sve_ldffbds_zss,
5426 gen_helper_sve_ldffhds_le_zss,
5427 gen_helper_sve_ldffsds_le_zss,
5428 NULL, },
5429 { gen_helper_sve_ldffbdu_zss,
5430 gen_helper_sve_ldffhdu_le_zss,
5431 gen_helper_sve_ldffsdu_le_zss,
5432 gen_helper_sve_ldffdd_le_zss, } },
5433 { { gen_helper_sve_ldffbds_zd,
5434 gen_helper_sve_ldffhds_le_zd,
5435 gen_helper_sve_ldffsds_le_zd,
5436 NULL, },
5437 { gen_helper_sve_ldffbdu_zd,
5438 gen_helper_sve_ldffhdu_le_zd,
5439 gen_helper_sve_ldffsdu_le_zd,
5440 gen_helper_sve_ldffdd_le_zd, } } } },
5441 { /* Big-endian */
5442 { { { gen_helper_sve_ldbds_zsu,
5443 gen_helper_sve_ldhds_be_zsu,
5444 gen_helper_sve_ldsds_be_zsu,
5445 NULL, },
5446 { gen_helper_sve_ldbdu_zsu,
5447 gen_helper_sve_ldhdu_be_zsu,
5448 gen_helper_sve_ldsdu_be_zsu,
5449 gen_helper_sve_lddd_be_zsu, } },
5450 { { gen_helper_sve_ldbds_zss,
5451 gen_helper_sve_ldhds_be_zss,
5452 gen_helper_sve_ldsds_be_zss,
5453 NULL, },
5454 { gen_helper_sve_ldbdu_zss,
5455 gen_helper_sve_ldhdu_be_zss,
5456 gen_helper_sve_ldsdu_be_zss,
5457 gen_helper_sve_lddd_be_zss, } },
5458 { { gen_helper_sve_ldbds_zd,
5459 gen_helper_sve_ldhds_be_zd,
5460 gen_helper_sve_ldsds_be_zd,
5461 NULL, },
5462 { gen_helper_sve_ldbdu_zd,
5463 gen_helper_sve_ldhdu_be_zd,
5464 gen_helper_sve_ldsdu_be_zd,
5465 gen_helper_sve_lddd_be_zd, } } },
5466
5467 /* First-fault */
5468 { { { gen_helper_sve_ldffbds_zsu,
5469 gen_helper_sve_ldffhds_be_zsu,
5470 gen_helper_sve_ldffsds_be_zsu,
5471 NULL, },
5472 { gen_helper_sve_ldffbdu_zsu,
5473 gen_helper_sve_ldffhdu_be_zsu,
5474 gen_helper_sve_ldffsdu_be_zsu,
5475 gen_helper_sve_ldffdd_be_zsu, } },
5476 { { gen_helper_sve_ldffbds_zss,
5477 gen_helper_sve_ldffhds_be_zss,
5478 gen_helper_sve_ldffsds_be_zss,
5479 NULL, },
5480 { gen_helper_sve_ldffbdu_zss,
5481 gen_helper_sve_ldffhdu_be_zss,
5482 gen_helper_sve_ldffsdu_be_zss,
5483 gen_helper_sve_ldffdd_be_zss, } },
5484 { { gen_helper_sve_ldffbds_zd,
5485 gen_helper_sve_ldffhds_be_zd,
5486 gen_helper_sve_ldffsds_be_zd,
5487 NULL, },
5488 { gen_helper_sve_ldffbdu_zd,
5489 gen_helper_sve_ldffhdu_be_zd,
5490 gen_helper_sve_ldffsdu_be_zd,
5491 gen_helper_sve_ldffdd_be_zd, } } } } },
5492 { /* MTE Active */
5493 { /* Little-endian */
5494 { { { gen_helper_sve_ldbds_zsu_mte,
5495 gen_helper_sve_ldhds_le_zsu_mte,
5496 gen_helper_sve_ldsds_le_zsu_mte,
5497 NULL, },
5498 { gen_helper_sve_ldbdu_zsu_mte,
5499 gen_helper_sve_ldhdu_le_zsu_mte,
5500 gen_helper_sve_ldsdu_le_zsu_mte,
5501 gen_helper_sve_lddd_le_zsu_mte, } },
5502 { { gen_helper_sve_ldbds_zss_mte,
5503 gen_helper_sve_ldhds_le_zss_mte,
5504 gen_helper_sve_ldsds_le_zss_mte,
5505 NULL, },
5506 { gen_helper_sve_ldbdu_zss_mte,
5507 gen_helper_sve_ldhdu_le_zss_mte,
5508 gen_helper_sve_ldsdu_le_zss_mte,
5509 gen_helper_sve_lddd_le_zss_mte, } },
5510 { { gen_helper_sve_ldbds_zd_mte,
5511 gen_helper_sve_ldhds_le_zd_mte,
5512 gen_helper_sve_ldsds_le_zd_mte,
5513 NULL, },
5514 { gen_helper_sve_ldbdu_zd_mte,
5515 gen_helper_sve_ldhdu_le_zd_mte,
5516 gen_helper_sve_ldsdu_le_zd_mte,
5517 gen_helper_sve_lddd_le_zd_mte, } } },
5518
5519 /* First-fault */
5520 { { { gen_helper_sve_ldffbds_zsu_mte,
5521 gen_helper_sve_ldffhds_le_zsu_mte,
5522 gen_helper_sve_ldffsds_le_zsu_mte,
5523 NULL, },
5524 { gen_helper_sve_ldffbdu_zsu_mte,
5525 gen_helper_sve_ldffhdu_le_zsu_mte,
5526 gen_helper_sve_ldffsdu_le_zsu_mte,
5527 gen_helper_sve_ldffdd_le_zsu_mte, } },
5528 { { gen_helper_sve_ldffbds_zss_mte,
5529 gen_helper_sve_ldffhds_le_zss_mte,
5530 gen_helper_sve_ldffsds_le_zss_mte,
5531 NULL, },
5532 { gen_helper_sve_ldffbdu_zss_mte,
5533 gen_helper_sve_ldffhdu_le_zss_mte,
5534 gen_helper_sve_ldffsdu_le_zss_mte,
5535 gen_helper_sve_ldffdd_le_zss_mte, } },
5536 { { gen_helper_sve_ldffbds_zd_mte,
5537 gen_helper_sve_ldffhds_le_zd_mte,
5538 gen_helper_sve_ldffsds_le_zd_mte,
5539 NULL, },
5540 { gen_helper_sve_ldffbdu_zd_mte,
5541 gen_helper_sve_ldffhdu_le_zd_mte,
5542 gen_helper_sve_ldffsdu_le_zd_mte,
5543 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5544 { /* Big-endian */
5545 { { { gen_helper_sve_ldbds_zsu_mte,
5546 gen_helper_sve_ldhds_be_zsu_mte,
5547 gen_helper_sve_ldsds_be_zsu_mte,
5548 NULL, },
5549 { gen_helper_sve_ldbdu_zsu_mte,
5550 gen_helper_sve_ldhdu_be_zsu_mte,
5551 gen_helper_sve_ldsdu_be_zsu_mte,
5552 gen_helper_sve_lddd_be_zsu_mte, } },
5553 { { gen_helper_sve_ldbds_zss_mte,
5554 gen_helper_sve_ldhds_be_zss_mte,
5555 gen_helper_sve_ldsds_be_zss_mte,
5556 NULL, },
5557 { gen_helper_sve_ldbdu_zss_mte,
5558 gen_helper_sve_ldhdu_be_zss_mte,
5559 gen_helper_sve_ldsdu_be_zss_mte,
5560 gen_helper_sve_lddd_be_zss_mte, } },
5561 { { gen_helper_sve_ldbds_zd_mte,
5562 gen_helper_sve_ldhds_be_zd_mte,
5563 gen_helper_sve_ldsds_be_zd_mte,
5564 NULL, },
5565 { gen_helper_sve_ldbdu_zd_mte,
5566 gen_helper_sve_ldhdu_be_zd_mte,
5567 gen_helper_sve_ldsdu_be_zd_mte,
5568 gen_helper_sve_lddd_be_zd_mte, } } },
5569
5570 /* First-fault */
5571 { { { gen_helper_sve_ldffbds_zsu_mte,
5572 gen_helper_sve_ldffhds_be_zsu_mte,
5573 gen_helper_sve_ldffsds_be_zsu_mte,
5574 NULL, },
5575 { gen_helper_sve_ldffbdu_zsu_mte,
5576 gen_helper_sve_ldffhdu_be_zsu_mte,
5577 gen_helper_sve_ldffsdu_be_zsu_mte,
5578 gen_helper_sve_ldffdd_be_zsu_mte, } },
5579 { { gen_helper_sve_ldffbds_zss_mte,
5580 gen_helper_sve_ldffhds_be_zss_mte,
5581 gen_helper_sve_ldffsds_be_zss_mte,
5582 NULL, },
5583 { gen_helper_sve_ldffbdu_zss_mte,
5584 gen_helper_sve_ldffhdu_be_zss_mte,
5585 gen_helper_sve_ldffsdu_be_zss_mte,
5586 gen_helper_sve_ldffdd_be_zss_mte, } },
5587 { { gen_helper_sve_ldffbds_zd_mte,
5588 gen_helper_sve_ldffhds_be_zd_mte,
5589 gen_helper_sve_ldffsds_be_zd_mte,
5590 NULL, },
5591 { gen_helper_sve_ldffbdu_zd_mte,
5592 gen_helper_sve_ldffhdu_be_zd_mte,
5593 gen_helper_sve_ldffsdu_be_zd_mte,
5594 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5595};
5596
3a7be554 5597static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5598{
5599 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5600 bool be = s->be_data == MO_BE;
5601 bool mte = s->mte_active[0];
673e9fa6
RH
5602
5603 if (!sve_access_check(s)) {
5604 return true;
5605 }
5606
5607 switch (a->esz) {
5608 case MO_32:
d28d12f0 5609 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5610 break;
5611 case MO_64:
d28d12f0 5612 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5613 break;
5614 }
5615 assert(fn != NULL);
5616
5617 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5618 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5619 return true;
5620}
5621
3a7be554 5622static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5623{
5624 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5625 bool be = s->be_data == MO_BE;
5626 bool mte = s->mte_active[0];
673e9fa6
RH
5627
5628 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5629 return false;
5630 }
5631 if (!sve_access_check(s)) {
5632 return true;
5633 }
5634
5635 switch (a->esz) {
5636 case MO_32:
d28d12f0 5637 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5638 break;
5639 case MO_64:
d28d12f0 5640 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5641 break;
5642 }
5643 assert(fn != NULL);
5644
5645 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5646 * by loading the immediate into the scalar parameter.
5647 */
2ccdf94f
RH
5648 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5649 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5650 return true;
5651}
5652
cf327449
SL
5653static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5654{
b17ab470
RH
5655 gen_helper_gvec_mem_scatter *fn = NULL;
5656 bool be = s->be_data == MO_BE;
5657 bool mte = s->mte_active[0];
5658
5659 if (a->esz < a->msz + !a->u) {
5660 return false;
5661 }
cf327449
SL
5662 if (!dc_isar_feature(aa64_sve2, s)) {
5663 return false;
5664 }
b17ab470
RH
5665 if (!sve_access_check(s)) {
5666 return true;
5667 }
5668
5669 switch (a->esz) {
5670 case MO_32:
5671 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5672 break;
5673 case MO_64:
5674 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5675 break;
5676 }
5677 assert(fn != NULL);
5678
5679 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5680 cpu_reg(s, a->rm), a->msz, false, fn);
5681 return true;
cf327449
SL
5682}
5683
d28d12f0
RH
5684/* Indexed by [mte][be][xs][msz]. */
5685static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5686 { /* MTE Inactive */
5687 { /* Little-endian */
5688 { gen_helper_sve_stbs_zsu,
5689 gen_helper_sve_sths_le_zsu,
5690 gen_helper_sve_stss_le_zsu, },
5691 { gen_helper_sve_stbs_zss,
5692 gen_helper_sve_sths_le_zss,
5693 gen_helper_sve_stss_le_zss, } },
5694 { /* Big-endian */
5695 { gen_helper_sve_stbs_zsu,
5696 gen_helper_sve_sths_be_zsu,
5697 gen_helper_sve_stss_be_zsu, },
5698 { gen_helper_sve_stbs_zss,
5699 gen_helper_sve_sths_be_zss,
5700 gen_helper_sve_stss_be_zss, } } },
5701 { /* MTE Active */
5702 { /* Little-endian */
5703 { gen_helper_sve_stbs_zsu_mte,
5704 gen_helper_sve_sths_le_zsu_mte,
5705 gen_helper_sve_stss_le_zsu_mte, },
5706 { gen_helper_sve_stbs_zss_mte,
5707 gen_helper_sve_sths_le_zss_mte,
5708 gen_helper_sve_stss_le_zss_mte, } },
5709 { /* Big-endian */
5710 { gen_helper_sve_stbs_zsu_mte,
5711 gen_helper_sve_sths_be_zsu_mte,
5712 gen_helper_sve_stss_be_zsu_mte, },
5713 { gen_helper_sve_stbs_zss_mte,
5714 gen_helper_sve_sths_be_zss_mte,
5715 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5716};
5717
5718/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5719static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5720 { /* MTE Inactive */
5721 { /* Little-endian */
5722 { gen_helper_sve_stbd_zsu,
5723 gen_helper_sve_sthd_le_zsu,
5724 gen_helper_sve_stsd_le_zsu,
5725 gen_helper_sve_stdd_le_zsu, },
5726 { gen_helper_sve_stbd_zss,
5727 gen_helper_sve_sthd_le_zss,
5728 gen_helper_sve_stsd_le_zss,
5729 gen_helper_sve_stdd_le_zss, },
5730 { gen_helper_sve_stbd_zd,
5731 gen_helper_sve_sthd_le_zd,
5732 gen_helper_sve_stsd_le_zd,
5733 gen_helper_sve_stdd_le_zd, } },
5734 { /* Big-endian */
5735 { gen_helper_sve_stbd_zsu,
5736 gen_helper_sve_sthd_be_zsu,
5737 gen_helper_sve_stsd_be_zsu,
5738 gen_helper_sve_stdd_be_zsu, },
5739 { gen_helper_sve_stbd_zss,
5740 gen_helper_sve_sthd_be_zss,
5741 gen_helper_sve_stsd_be_zss,
5742 gen_helper_sve_stdd_be_zss, },
5743 { gen_helper_sve_stbd_zd,
5744 gen_helper_sve_sthd_be_zd,
5745 gen_helper_sve_stsd_be_zd,
5746 gen_helper_sve_stdd_be_zd, } } },
5747 { /* MTE Inactive */
5748 { /* Little-endian */
5749 { gen_helper_sve_stbd_zsu_mte,
5750 gen_helper_sve_sthd_le_zsu_mte,
5751 gen_helper_sve_stsd_le_zsu_mte,
5752 gen_helper_sve_stdd_le_zsu_mte, },
5753 { gen_helper_sve_stbd_zss_mte,
5754 gen_helper_sve_sthd_le_zss_mte,
5755 gen_helper_sve_stsd_le_zss_mte,
5756 gen_helper_sve_stdd_le_zss_mte, },
5757 { gen_helper_sve_stbd_zd_mte,
5758 gen_helper_sve_sthd_le_zd_mte,
5759 gen_helper_sve_stsd_le_zd_mte,
5760 gen_helper_sve_stdd_le_zd_mte, } },
5761 { /* Big-endian */
5762 { gen_helper_sve_stbd_zsu_mte,
5763 gen_helper_sve_sthd_be_zsu_mte,
5764 gen_helper_sve_stsd_be_zsu_mte,
5765 gen_helper_sve_stdd_be_zsu_mte, },
5766 { gen_helper_sve_stbd_zss_mte,
5767 gen_helper_sve_sthd_be_zss_mte,
5768 gen_helper_sve_stsd_be_zss_mte,
5769 gen_helper_sve_stdd_be_zss_mte, },
5770 { gen_helper_sve_stbd_zd_mte,
5771 gen_helper_sve_sthd_be_zd_mte,
5772 gen_helper_sve_stsd_be_zd_mte,
5773 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5774};
5775
3a7be554 5776static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5777{
f6dbf62a 5778 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5779 bool be = s->be_data == MO_BE;
5780 bool mte = s->mte_active[0];
f6dbf62a
RH
5781
5782 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5783 return false;
5784 }
5785 if (!sve_access_check(s)) {
5786 return true;
5787 }
5788 switch (a->esz) {
5789 case MO_32:
d28d12f0 5790 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5791 break;
5792 case MO_64:
d28d12f0 5793 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5794 break;
5795 default:
5796 g_assert_not_reached();
5797 }
5798 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5799 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5800 return true;
5801}
dec6cf6b 5802
3a7be554 5803static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5804{
5805 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5806 bool be = s->be_data == MO_BE;
5807 bool mte = s->mte_active[0];
408ecde9
RH
5808
5809 if (a->esz < a->msz) {
5810 return false;
5811 }
5812 if (!sve_access_check(s)) {
5813 return true;
5814 }
5815
5816 switch (a->esz) {
5817 case MO_32:
d28d12f0 5818 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5819 break;
5820 case MO_64:
d28d12f0 5821 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5822 break;
5823 }
5824 assert(fn != NULL);
5825
5826 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5827 * by loading the immediate into the scalar parameter.
5828 */
2ccdf94f
RH
5829 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5830 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5831 return true;
5832}
5833
6ebca45f
SL
5834static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5835{
b17ab470
RH
5836 gen_helper_gvec_mem_scatter *fn;
5837 bool be = s->be_data == MO_BE;
5838 bool mte = s->mte_active[0];
5839
5840 if (a->esz < a->msz) {
5841 return false;
5842 }
6ebca45f
SL
5843 if (!dc_isar_feature(aa64_sve2, s)) {
5844 return false;
5845 }
b17ab470
RH
5846 if (!sve_access_check(s)) {
5847 return true;
5848 }
5849
5850 switch (a->esz) {
5851 case MO_32:
5852 fn = scatter_store_fn32[mte][be][0][a->msz];
5853 break;
5854 case MO_64:
5855 fn = scatter_store_fn64[mte][be][2][a->msz];
5856 break;
5857 default:
5858 g_assert_not_reached();
5859 }
5860
5861 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5862 cpu_reg(s, a->rm), a->msz, true, fn);
5863 return true;
6ebca45f
SL
5864}
5865
dec6cf6b
RH
5866/*
5867 * Prefetches
5868 */
5869
3a7be554 5870static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5871{
5872 /* Prefetch is a nop within QEMU. */
2f95a3b0 5873 (void)sve_access_check(s);
dec6cf6b
RH
5874 return true;
5875}
5876
3a7be554 5877static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5878{
5879 if (a->rm == 31) {
5880 return false;
5881 }
5882 /* Prefetch is a nop within QEMU. */
2f95a3b0 5883 (void)sve_access_check(s);
dec6cf6b
RH
5884 return true;
5885}
a2103582
RH
5886
5887/*
5888 * Move Prefix
5889 *
5890 * TODO: The implementation so far could handle predicated merging movprfx.
5891 * The helper functions as written take an extra source register to
5892 * use in the operation, but the result is only written when predication
5893 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5894 * to allow the final write back to the destination to be unconditional.
5895 * For predicated zeroing movprfx, we need to rearrange the helpers to
5896 * allow the final write back to zero inactives.
5897 *
5898 * In the meantime, just emit the moves.
5899 */
5900
4b0b37e9
RH
5901TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5902TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5903TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5dad1ba5
RH
5904
5905/*
5906 * SVE2 Integer Multiply - Unpredicated
5907 */
5908
b262215b 5909TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 5910
bd394cf5
RH
5911static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5912 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5913 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5914};
5915TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5916 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 5917
bd394cf5
RH
5918static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5919 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5920 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5921};
5922TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5923 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 5924
bd394cf5
RH
5925TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5926 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 5927
bd394cf5
RH
5928static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5929 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5930 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5931};
5932TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5933 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 5934
bd394cf5
RH
5935static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5936 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5937 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5938};
5939TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5940 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 5941
d4b1e59d
RH
5942/*
5943 * SVE2 Integer - Predicated
5944 */
5945
5880bdc0
RH
5946static gen_helper_gvec_4 * const sadlp_fns[4] = {
5947 NULL, gen_helper_sve2_sadalp_zpzz_h,
5948 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5949};
5950TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5951 sadlp_fns[a->esz], a, 0)
d4b1e59d 5952
5880bdc0
RH
5953static gen_helper_gvec_4 * const uadlp_fns[4] = {
5954 NULL, gen_helper_sve2_uadalp_zpzz_h,
5955 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5956};
5957TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5958 uadlp_fns[a->esz], a, 0)
db366da8
RH
5959
5960/*
5961 * SVE2 integer unary operations (predicated)
5962 */
5963
b2c00961
RH
5964TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5965 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 5966
b2c00961
RH
5967TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5968 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 5969
b2c00961
RH
5970static gen_helper_gvec_3 * const sqabs_fns[4] = {
5971 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5972 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5973};
5974TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 5975
b2c00961
RH
5976static gen_helper_gvec_3 * const sqneg_fns[4] = {
5977 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5978 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5979};
5980TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 5981
5880bdc0
RH
5982DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5983DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5984DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 5985
5880bdc0
RH
5986DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5987DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5988DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 5989
5880bdc0
RH
5990DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5991DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5992DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 5993
5880bdc0
RH
5994DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5995DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5996DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 5997
5880bdc0
RH
5998DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5999DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6000DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6001DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6002DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6003
5880bdc0
RH
6004DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6005DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6006DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6007DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6008DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6009DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6010
6011/*
6012 * SVE2 Widening Integer Arithmetic
6013 */
6014
615f19fe
RH
6015static gen_helper_gvec_3 * const saddl_fns[4] = {
6016 NULL, gen_helper_sve2_saddl_h,
6017 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6018};
6019TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6020 saddl_fns[a->esz], a, 0)
6021TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6022 saddl_fns[a->esz], a, 3)
6023TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6024 saddl_fns[a->esz], a, 2)
6025
6026static gen_helper_gvec_3 * const ssubl_fns[4] = {
6027 NULL, gen_helper_sve2_ssubl_h,
6028 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6029};
6030TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6031 ssubl_fns[a->esz], a, 0)
6032TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6033 ssubl_fns[a->esz], a, 3)
6034TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6035 ssubl_fns[a->esz], a, 2)
6036TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6037 ssubl_fns[a->esz], a, 1)
6038
6039static gen_helper_gvec_3 * const sabdl_fns[4] = {
6040 NULL, gen_helper_sve2_sabdl_h,
6041 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6042};
6043TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6044 sabdl_fns[a->esz], a, 0)
6045TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6046 sabdl_fns[a->esz], a, 3)
6047
6048static gen_helper_gvec_3 * const uaddl_fns[4] = {
6049 NULL, gen_helper_sve2_uaddl_h,
6050 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6051};
6052TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6053 uaddl_fns[a->esz], a, 0)
6054TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6055 uaddl_fns[a->esz], a, 3)
6056
6057static gen_helper_gvec_3 * const usubl_fns[4] = {
6058 NULL, gen_helper_sve2_usubl_h,
6059 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6060};
6061TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6062 usubl_fns[a->esz], a, 0)
6063TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6064 usubl_fns[a->esz], a, 3)
6065
6066static gen_helper_gvec_3 * const uabdl_fns[4] = {
6067 NULL, gen_helper_sve2_uabdl_h,
6068 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6069};
6070TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6071 uabdl_fns[a->esz], a, 0)
6072TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6073 uabdl_fns[a->esz], a, 3)
6074
6075static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6076 NULL, gen_helper_sve2_sqdmull_zzz_h,
6077 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6078};
6079TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6080 sqdmull_fns[a->esz], a, 0)
6081TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6082 sqdmull_fns[a->esz], a, 3)
6083
6084static gen_helper_gvec_3 * const smull_fns[4] = {
6085 NULL, gen_helper_sve2_smull_zzz_h,
6086 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6087};
6088TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6089 smull_fns[a->esz], a, 0)
6090TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6091 smull_fns[a->esz], a, 3)
6092
6093static gen_helper_gvec_3 * const umull_fns[4] = {
6094 NULL, gen_helper_sve2_umull_zzz_h,
6095 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6096};
6097TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6098 umull_fns[a->esz], a, 0)
6099TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6100 umull_fns[a->esz], a, 3)
6101
6102static gen_helper_gvec_3 * const eoril_fns[4] = {
6103 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6104 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6105};
6106TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6107TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6108
e3a56131
RH
6109static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6110{
6111 static gen_helper_gvec_3 * const fns[4] = {
6112 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6113 NULL, gen_helper_sve2_pmull_d,
6114 };
6115 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6116 return false;
6117 }
615f19fe 6118 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6119}
6120
615f19fe
RH
6121TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6122TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6123
615f19fe
RH
6124static gen_helper_gvec_3 * const saddw_fns[4] = {
6125 NULL, gen_helper_sve2_saddw_h,
6126 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6127};
6128TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6129TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6130
615f19fe
RH
6131static gen_helper_gvec_3 * const ssubw_fns[4] = {
6132 NULL, gen_helper_sve2_ssubw_h,
6133 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6134};
6135TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6136TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6137
615f19fe
RH
6138static gen_helper_gvec_3 * const uaddw_fns[4] = {
6139 NULL, gen_helper_sve2_uaddw_h,
6140 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6141};
6142TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6143TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6144
615f19fe
RH
6145static gen_helper_gvec_3 * const usubw_fns[4] = {
6146 NULL, gen_helper_sve2_usubw_h,
6147 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6148};
6149TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6150TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6151
6152static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6153{
6154 int top = imm & 1;
6155 int shl = imm >> 1;
6156 int halfbits = 4 << vece;
6157
6158 if (top) {
6159 if (shl == halfbits) {
6160 TCGv_vec t = tcg_temp_new_vec_matching(d);
6161 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6162 tcg_gen_and_vec(vece, d, n, t);
6163 tcg_temp_free_vec(t);
6164 } else {
6165 tcg_gen_sari_vec(vece, d, n, halfbits);
6166 tcg_gen_shli_vec(vece, d, d, shl);
6167 }
6168 } else {
6169 tcg_gen_shli_vec(vece, d, n, halfbits);
6170 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6171 }
6172}
6173
6174static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6175{
6176 int halfbits = 4 << vece;
6177 int top = imm & 1;
6178 int shl = (imm >> 1);
6179 int shift;
6180 uint64_t mask;
6181
6182 mask = MAKE_64BIT_MASK(0, halfbits);
6183 mask <<= shl;
6184 mask = dup_const(vece, mask);
6185
6186 shift = shl - top * halfbits;
6187 if (shift < 0) {
6188 tcg_gen_shri_i64(d, n, -shift);
6189 } else {
6190 tcg_gen_shli_i64(d, n, shift);
6191 }
6192 tcg_gen_andi_i64(d, d, mask);
6193}
6194
6195static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6196{
6197 gen_ushll_i64(MO_16, d, n, imm);
6198}
6199
6200static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6201{
6202 gen_ushll_i64(MO_32, d, n, imm);
6203}
6204
6205static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6206{
6207 gen_ushll_i64(MO_64, d, n, imm);
6208}
6209
6210static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6211{
6212 int halfbits = 4 << vece;
6213 int top = imm & 1;
6214 int shl = imm >> 1;
6215
6216 if (top) {
6217 if (shl == halfbits) {
6218 TCGv_vec t = tcg_temp_new_vec_matching(d);
6219 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6220 tcg_gen_and_vec(vece, d, n, t);
6221 tcg_temp_free_vec(t);
6222 } else {
6223 tcg_gen_shri_vec(vece, d, n, halfbits);
6224 tcg_gen_shli_vec(vece, d, d, shl);
6225 }
6226 } else {
6227 if (shl == 0) {
6228 TCGv_vec t = tcg_temp_new_vec_matching(d);
6229 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6230 tcg_gen_and_vec(vece, d, n, t);
6231 tcg_temp_free_vec(t);
6232 } else {
6233 tcg_gen_shli_vec(vece, d, n, halfbits);
6234 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6235 }
6236 }
6237}
6238
6239static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6240 bool sel, bool uns)
6241{
6242 static const TCGOpcode sshll_list[] = {
6243 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6244 };
6245 static const TCGOpcode ushll_list[] = {
6246 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6247 };
6248 static const GVecGen2i ops[2][3] = {
6249 { { .fniv = gen_sshll_vec,
6250 .opt_opc = sshll_list,
6251 .fno = gen_helper_sve2_sshll_h,
6252 .vece = MO_16 },
6253 { .fniv = gen_sshll_vec,
6254 .opt_opc = sshll_list,
6255 .fno = gen_helper_sve2_sshll_s,
6256 .vece = MO_32 },
6257 { .fniv = gen_sshll_vec,
6258 .opt_opc = sshll_list,
6259 .fno = gen_helper_sve2_sshll_d,
6260 .vece = MO_64 } },
6261 { { .fni8 = gen_ushll16_i64,
6262 .fniv = gen_ushll_vec,
6263 .opt_opc = ushll_list,
6264 .fno = gen_helper_sve2_ushll_h,
6265 .vece = MO_16 },
6266 { .fni8 = gen_ushll32_i64,
6267 .fniv = gen_ushll_vec,
6268 .opt_opc = ushll_list,
6269 .fno = gen_helper_sve2_ushll_s,
6270 .vece = MO_32 },
6271 { .fni8 = gen_ushll64_i64,
6272 .fniv = gen_ushll_vec,
6273 .opt_opc = ushll_list,
6274 .fno = gen_helper_sve2_ushll_d,
6275 .vece = MO_64 } },
6276 };
6277
6278 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6279 return false;
6280 }
6281 if (sve_access_check(s)) {
6282 unsigned vsz = vec_full_reg_size(s);
6283 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6284 vec_full_reg_offset(s, a->rn),
6285 vsz, vsz, (a->imm << 1) | sel,
6286 &ops[uns][a->esz]);
6287 }
6288 return true;
6289}
6290
6291static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6292{
6293 return do_sve2_shll_tb(s, a, false, false);
6294}
6295
6296static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6297{
6298 return do_sve2_shll_tb(s, a, true, false);
6299}
6300
6301static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6302{
6303 return do_sve2_shll_tb(s, a, false, true);
6304}
6305
6306static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6307{
6308 return do_sve2_shll_tb(s, a, true, true);
6309}
cb9c33b8 6310
615f19fe
RH
6311static gen_helper_gvec_3 * const bext_fns[4] = {
6312 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6313 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6314};
6315TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6316 bext_fns[a->esz], a, 0)
ed4a6387 6317
615f19fe
RH
6318static gen_helper_gvec_3 * const bdep_fns[4] = {
6319 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6320 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6321};
6322TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6323 bdep_fns[a->esz], a, 0)
ed4a6387 6324
615f19fe
RH
6325static gen_helper_gvec_3 * const bgrp_fns[4] = {
6326 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6327 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6328};
6329TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6330 bgrp_fns[a->esz], a, 0)
ed4a6387 6331
615f19fe
RH
6332static gen_helper_gvec_3 * const cadd_fns[4] = {
6333 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6334 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6335};
6336TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6337 cadd_fns[a->esz], a, 0)
6338TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6339 cadd_fns[a->esz], a, 1)
6340
6341static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6342 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6343 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6344};
6345TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6346 sqcadd_fns[a->esz], a, 0)
6347TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6348 sqcadd_fns[a->esz], a, 1)
38650638 6349
eeb4e84d
RH
6350static gen_helper_gvec_4 * const sabal_fns[4] = {
6351 NULL, gen_helper_sve2_sabal_h,
6352 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6353};
6354TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6355TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6356
eeb4e84d
RH
6357static gen_helper_gvec_4 * const uabal_fns[4] = {
6358 NULL, gen_helper_sve2_uabal_h,
6359 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6360};
6361TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6362TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6363
6364static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6365{
6366 static gen_helper_gvec_4 * const fns[2] = {
6367 gen_helper_sve2_adcl_s,
6368 gen_helper_sve2_adcl_d,
6369 };
6370 /*
6371 * Note that in this case the ESZ field encodes both size and sign.
6372 * Split out 'subtract' into bit 1 of the data field for the helper.
6373 */
eeb4e84d 6374 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6375}
6376
eeb4e84d
RH
6377TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6378TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6379
f2be26a5
RH
6380TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6381TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6382TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6383TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6384TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6385TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6386
79828dcb
RH
6387TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6388TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d
RH
6389
6390static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6391 const GVecGen2 ops[3])
6392{
6393 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6394 !dc_isar_feature(aa64_sve2, s)) {
6395 return false;
6396 }
6397 if (sve_access_check(s)) {
6398 unsigned vsz = vec_full_reg_size(s);
6399 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6400 vec_full_reg_offset(s, a->rn),
6401 vsz, vsz, &ops[a->esz]);
6402 }
6403 return true;
6404}
6405
6406static const TCGOpcode sqxtn_list[] = {
6407 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6408};
6409
6410static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6411{
6412 TCGv_vec t = tcg_temp_new_vec_matching(d);
6413 int halfbits = 4 << vece;
6414 int64_t mask = (1ull << halfbits) - 1;
6415 int64_t min = -1ull << (halfbits - 1);
6416 int64_t max = -min - 1;
6417
6418 tcg_gen_dupi_vec(vece, t, min);
6419 tcg_gen_smax_vec(vece, d, n, t);
6420 tcg_gen_dupi_vec(vece, t, max);
6421 tcg_gen_smin_vec(vece, d, d, t);
6422 tcg_gen_dupi_vec(vece, t, mask);
6423 tcg_gen_and_vec(vece, d, d, t);
6424 tcg_temp_free_vec(t);
6425}
6426
6427static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6428{
6429 static const GVecGen2 ops[3] = {
6430 { .fniv = gen_sqxtnb_vec,
6431 .opt_opc = sqxtn_list,
6432 .fno = gen_helper_sve2_sqxtnb_h,
6433 .vece = MO_16 },
6434 { .fniv = gen_sqxtnb_vec,
6435 .opt_opc = sqxtn_list,
6436 .fno = gen_helper_sve2_sqxtnb_s,
6437 .vece = MO_32 },
6438 { .fniv = gen_sqxtnb_vec,
6439 .opt_opc = sqxtn_list,
6440 .fno = gen_helper_sve2_sqxtnb_d,
6441 .vece = MO_64 },
6442 };
6443 return do_sve2_narrow_extract(s, a, ops);
6444}
6445
6446static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6447{
6448 TCGv_vec t = tcg_temp_new_vec_matching(d);
6449 int halfbits = 4 << vece;
6450 int64_t mask = (1ull << halfbits) - 1;
6451 int64_t min = -1ull << (halfbits - 1);
6452 int64_t max = -min - 1;
6453
6454 tcg_gen_dupi_vec(vece, t, min);
6455 tcg_gen_smax_vec(vece, n, n, t);
6456 tcg_gen_dupi_vec(vece, t, max);
6457 tcg_gen_smin_vec(vece, n, n, t);
6458 tcg_gen_shli_vec(vece, n, n, halfbits);
6459 tcg_gen_dupi_vec(vece, t, mask);
6460 tcg_gen_bitsel_vec(vece, d, t, d, n);
6461 tcg_temp_free_vec(t);
6462}
6463
6464static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6465{
6466 static const GVecGen2 ops[3] = {
6467 { .fniv = gen_sqxtnt_vec,
6468 .opt_opc = sqxtn_list,
6469 .load_dest = true,
6470 .fno = gen_helper_sve2_sqxtnt_h,
6471 .vece = MO_16 },
6472 { .fniv = gen_sqxtnt_vec,
6473 .opt_opc = sqxtn_list,
6474 .load_dest = true,
6475 .fno = gen_helper_sve2_sqxtnt_s,
6476 .vece = MO_32 },
6477 { .fniv = gen_sqxtnt_vec,
6478 .opt_opc = sqxtn_list,
6479 .load_dest = true,
6480 .fno = gen_helper_sve2_sqxtnt_d,
6481 .vece = MO_64 },
6482 };
6483 return do_sve2_narrow_extract(s, a, ops);
6484}
6485
6486static const TCGOpcode uqxtn_list[] = {
6487 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6488};
6489
6490static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6491{
6492 TCGv_vec t = tcg_temp_new_vec_matching(d);
6493 int halfbits = 4 << vece;
6494 int64_t max = (1ull << halfbits) - 1;
6495
6496 tcg_gen_dupi_vec(vece, t, max);
6497 tcg_gen_umin_vec(vece, d, n, t);
6498 tcg_temp_free_vec(t);
6499}
6500
6501static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6502{
6503 static const GVecGen2 ops[3] = {
6504 { .fniv = gen_uqxtnb_vec,
6505 .opt_opc = uqxtn_list,
6506 .fno = gen_helper_sve2_uqxtnb_h,
6507 .vece = MO_16 },
6508 { .fniv = gen_uqxtnb_vec,
6509 .opt_opc = uqxtn_list,
6510 .fno = gen_helper_sve2_uqxtnb_s,
6511 .vece = MO_32 },
6512 { .fniv = gen_uqxtnb_vec,
6513 .opt_opc = uqxtn_list,
6514 .fno = gen_helper_sve2_uqxtnb_d,
6515 .vece = MO_64 },
6516 };
6517 return do_sve2_narrow_extract(s, a, ops);
6518}
6519
6520static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6521{
6522 TCGv_vec t = tcg_temp_new_vec_matching(d);
6523 int halfbits = 4 << vece;
6524 int64_t max = (1ull << halfbits) - 1;
6525
6526 tcg_gen_dupi_vec(vece, t, max);
6527 tcg_gen_umin_vec(vece, n, n, t);
6528 tcg_gen_shli_vec(vece, n, n, halfbits);
6529 tcg_gen_bitsel_vec(vece, d, t, d, n);
6530 tcg_temp_free_vec(t);
6531}
6532
6533static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
6534{
6535 static const GVecGen2 ops[3] = {
6536 { .fniv = gen_uqxtnt_vec,
6537 .opt_opc = uqxtn_list,
6538 .load_dest = true,
6539 .fno = gen_helper_sve2_uqxtnt_h,
6540 .vece = MO_16 },
6541 { .fniv = gen_uqxtnt_vec,
6542 .opt_opc = uqxtn_list,
6543 .load_dest = true,
6544 .fno = gen_helper_sve2_uqxtnt_s,
6545 .vece = MO_32 },
6546 { .fniv = gen_uqxtnt_vec,
6547 .opt_opc = uqxtn_list,
6548 .load_dest = true,
6549 .fno = gen_helper_sve2_uqxtnt_d,
6550 .vece = MO_64 },
6551 };
6552 return do_sve2_narrow_extract(s, a, ops);
6553}
6554
6555static const TCGOpcode sqxtun_list[] = {
6556 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6557};
6558
6559static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6560{
6561 TCGv_vec t = tcg_temp_new_vec_matching(d);
6562 int halfbits = 4 << vece;
6563 int64_t max = (1ull << halfbits) - 1;
6564
6565 tcg_gen_dupi_vec(vece, t, 0);
6566 tcg_gen_smax_vec(vece, d, n, t);
6567 tcg_gen_dupi_vec(vece, t, max);
6568 tcg_gen_umin_vec(vece, d, d, t);
6569 tcg_temp_free_vec(t);
6570}
6571
6572static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
6573{
6574 static const GVecGen2 ops[3] = {
6575 { .fniv = gen_sqxtunb_vec,
6576 .opt_opc = sqxtun_list,
6577 .fno = gen_helper_sve2_sqxtunb_h,
6578 .vece = MO_16 },
6579 { .fniv = gen_sqxtunb_vec,
6580 .opt_opc = sqxtun_list,
6581 .fno = gen_helper_sve2_sqxtunb_s,
6582 .vece = MO_32 },
6583 { .fniv = gen_sqxtunb_vec,
6584 .opt_opc = sqxtun_list,
6585 .fno = gen_helper_sve2_sqxtunb_d,
6586 .vece = MO_64 },
6587 };
6588 return do_sve2_narrow_extract(s, a, ops);
6589}
6590
6591static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6592{
6593 TCGv_vec t = tcg_temp_new_vec_matching(d);
6594 int halfbits = 4 << vece;
6595 int64_t max = (1ull << halfbits) - 1;
6596
6597 tcg_gen_dupi_vec(vece, t, 0);
6598 tcg_gen_smax_vec(vece, n, n, t);
6599 tcg_gen_dupi_vec(vece, t, max);
6600 tcg_gen_umin_vec(vece, n, n, t);
6601 tcg_gen_shli_vec(vece, n, n, halfbits);
6602 tcg_gen_bitsel_vec(vece, d, t, d, n);
6603 tcg_temp_free_vec(t);
6604}
6605
6606static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
6607{
6608 static const GVecGen2 ops[3] = {
6609 { .fniv = gen_sqxtunt_vec,
6610 .opt_opc = sqxtun_list,
6611 .load_dest = true,
6612 .fno = gen_helper_sve2_sqxtunt_h,
6613 .vece = MO_16 },
6614 { .fniv = gen_sqxtunt_vec,
6615 .opt_opc = sqxtun_list,
6616 .load_dest = true,
6617 .fno = gen_helper_sve2_sqxtunt_s,
6618 .vece = MO_32 },
6619 { .fniv = gen_sqxtunt_vec,
6620 .opt_opc = sqxtun_list,
6621 .load_dest = true,
6622 .fno = gen_helper_sve2_sqxtunt_d,
6623 .vece = MO_64 },
6624 };
6625 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
6626}
6627
6628static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
6629 const GVecGen2i ops[3])
6630{
6631 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
6632 return false;
6633 }
6634 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6635 if (sve_access_check(s)) {
6636 unsigned vsz = vec_full_reg_size(s);
6637 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6638 vec_full_reg_offset(s, a->rn),
6639 vsz, vsz, a->imm, &ops[a->esz]);
6640 }
6641 return true;
6642}
6643
6644static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6645{
6646 int halfbits = 4 << vece;
6647 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6648
6649 tcg_gen_shri_i64(d, n, shr);
6650 tcg_gen_andi_i64(d, d, mask);
6651}
6652
6653static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6654{
6655 gen_shrnb_i64(MO_16, d, n, shr);
6656}
6657
6658static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6659{
6660 gen_shrnb_i64(MO_32, d, n, shr);
6661}
6662
6663static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6664{
6665 gen_shrnb_i64(MO_64, d, n, shr);
6666}
6667
6668static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6669{
6670 TCGv_vec t = tcg_temp_new_vec_matching(d);
6671 int halfbits = 4 << vece;
6672 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6673
6674 tcg_gen_shri_vec(vece, n, n, shr);
6675 tcg_gen_dupi_vec(vece, t, mask);
6676 tcg_gen_and_vec(vece, d, n, t);
6677 tcg_temp_free_vec(t);
6678}
6679
6680static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
6681{
6682 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
6683 static const GVecGen2i ops[3] = {
6684 { .fni8 = gen_shrnb16_i64,
6685 .fniv = gen_shrnb_vec,
6686 .opt_opc = vec_list,
6687 .fno = gen_helper_sve2_shrnb_h,
6688 .vece = MO_16 },
6689 { .fni8 = gen_shrnb32_i64,
6690 .fniv = gen_shrnb_vec,
6691 .opt_opc = vec_list,
6692 .fno = gen_helper_sve2_shrnb_s,
6693 .vece = MO_32 },
6694 { .fni8 = gen_shrnb64_i64,
6695 .fniv = gen_shrnb_vec,
6696 .opt_opc = vec_list,
6697 .fno = gen_helper_sve2_shrnb_d,
6698 .vece = MO_64 },
6699 };
6700 return do_sve2_shr_narrow(s, a, ops);
6701}
6702
6703static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6704{
6705 int halfbits = 4 << vece;
6706 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6707
6708 tcg_gen_shli_i64(n, n, halfbits - shr);
6709 tcg_gen_andi_i64(n, n, ~mask);
6710 tcg_gen_andi_i64(d, d, mask);
6711 tcg_gen_or_i64(d, d, n);
6712}
6713
6714static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6715{
6716 gen_shrnt_i64(MO_16, d, n, shr);
6717}
6718
6719static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6720{
6721 gen_shrnt_i64(MO_32, d, n, shr);
6722}
6723
6724static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6725{
6726 tcg_gen_shri_i64(n, n, shr);
6727 tcg_gen_deposit_i64(d, d, n, 32, 32);
6728}
6729
6730static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6731{
6732 TCGv_vec t = tcg_temp_new_vec_matching(d);
6733 int halfbits = 4 << vece;
6734 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6735
6736 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6737 tcg_gen_dupi_vec(vece, t, mask);
6738 tcg_gen_bitsel_vec(vece, d, t, d, n);
6739 tcg_temp_free_vec(t);
6740}
6741
6742static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
6743{
6744 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
6745 static const GVecGen2i ops[3] = {
6746 { .fni8 = gen_shrnt16_i64,
6747 .fniv = gen_shrnt_vec,
6748 .opt_opc = vec_list,
6749 .load_dest = true,
6750 .fno = gen_helper_sve2_shrnt_h,
6751 .vece = MO_16 },
6752 { .fni8 = gen_shrnt32_i64,
6753 .fniv = gen_shrnt_vec,
6754 .opt_opc = vec_list,
6755 .load_dest = true,
6756 .fno = gen_helper_sve2_shrnt_s,
6757 .vece = MO_32 },
6758 { .fni8 = gen_shrnt64_i64,
6759 .fniv = gen_shrnt_vec,
6760 .opt_opc = vec_list,
6761 .load_dest = true,
6762 .fno = gen_helper_sve2_shrnt_d,
6763 .vece = MO_64 },
6764 };
6765 return do_sve2_shr_narrow(s, a, ops);
6766}
6767
6768static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
6769{
6770 static const GVecGen2i ops[3] = {
6771 { .fno = gen_helper_sve2_rshrnb_h },
6772 { .fno = gen_helper_sve2_rshrnb_s },
6773 { .fno = gen_helper_sve2_rshrnb_d },
6774 };
6775 return do_sve2_shr_narrow(s, a, ops);
6776}
6777
6778static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
6779{
6780 static const GVecGen2i ops[3] = {
6781 { .fno = gen_helper_sve2_rshrnt_h },
6782 { .fno = gen_helper_sve2_rshrnt_s },
6783 { .fno = gen_helper_sve2_rshrnt_d },
6784 };
6785 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
6786}
6787
6788static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6789 TCGv_vec n, int64_t shr)
6790{
6791 TCGv_vec t = tcg_temp_new_vec_matching(d);
6792 int halfbits = 4 << vece;
6793
6794 tcg_gen_sari_vec(vece, n, n, shr);
6795 tcg_gen_dupi_vec(vece, t, 0);
6796 tcg_gen_smax_vec(vece, n, n, t);
6797 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6798 tcg_gen_umin_vec(vece, d, n, t);
6799 tcg_temp_free_vec(t);
6800}
6801
6802static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
6803{
6804 static const TCGOpcode vec_list[] = {
6805 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6806 };
6807 static const GVecGen2i ops[3] = {
6808 { .fniv = gen_sqshrunb_vec,
6809 .opt_opc = vec_list,
6810 .fno = gen_helper_sve2_sqshrunb_h,
6811 .vece = MO_16 },
6812 { .fniv = gen_sqshrunb_vec,
6813 .opt_opc = vec_list,
6814 .fno = gen_helper_sve2_sqshrunb_s,
6815 .vece = MO_32 },
6816 { .fniv = gen_sqshrunb_vec,
6817 .opt_opc = vec_list,
6818 .fno = gen_helper_sve2_sqshrunb_d,
6819 .vece = MO_64 },
6820 };
6821 return do_sve2_shr_narrow(s, a, ops);
6822}
6823
6824static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6825 TCGv_vec n, int64_t shr)
6826{
6827 TCGv_vec t = tcg_temp_new_vec_matching(d);
6828 int halfbits = 4 << vece;
6829
6830 tcg_gen_sari_vec(vece, n, n, shr);
6831 tcg_gen_dupi_vec(vece, t, 0);
6832 tcg_gen_smax_vec(vece, n, n, t);
6833 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6834 tcg_gen_umin_vec(vece, n, n, t);
6835 tcg_gen_shli_vec(vece, n, n, halfbits);
6836 tcg_gen_bitsel_vec(vece, d, t, d, n);
6837 tcg_temp_free_vec(t);
6838}
6839
6840static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
6841{
6842 static const TCGOpcode vec_list[] = {
6843 INDEX_op_shli_vec, INDEX_op_sari_vec,
6844 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6845 };
6846 static const GVecGen2i ops[3] = {
6847 { .fniv = gen_sqshrunt_vec,
6848 .opt_opc = vec_list,
6849 .load_dest = true,
6850 .fno = gen_helper_sve2_sqshrunt_h,
6851 .vece = MO_16 },
6852 { .fniv = gen_sqshrunt_vec,
6853 .opt_opc = vec_list,
6854 .load_dest = true,
6855 .fno = gen_helper_sve2_sqshrunt_s,
6856 .vece = MO_32 },
6857 { .fniv = gen_sqshrunt_vec,
6858 .opt_opc = vec_list,
6859 .load_dest = true,
6860 .fno = gen_helper_sve2_sqshrunt_d,
6861 .vece = MO_64 },
6862 };
6863 return do_sve2_shr_narrow(s, a, ops);
6864}
6865
6866static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
6867{
6868 static const GVecGen2i ops[3] = {
6869 { .fno = gen_helper_sve2_sqrshrunb_h },
6870 { .fno = gen_helper_sve2_sqrshrunb_s },
6871 { .fno = gen_helper_sve2_sqrshrunb_d },
6872 };
6873 return do_sve2_shr_narrow(s, a, ops);
6874}
6875
6876static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
6877{
6878 static const GVecGen2i ops[3] = {
6879 { .fno = gen_helper_sve2_sqrshrunt_h },
6880 { .fno = gen_helper_sve2_sqrshrunt_s },
6881 { .fno = gen_helper_sve2_sqrshrunt_d },
6882 };
6883 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
6884}
6885
743bb147
RH
6886static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6887 TCGv_vec n, int64_t shr)
6888{
6889 TCGv_vec t = tcg_temp_new_vec_matching(d);
6890 int halfbits = 4 << vece;
6891 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6892 int64_t min = -max - 1;
6893
6894 tcg_gen_sari_vec(vece, n, n, shr);
6895 tcg_gen_dupi_vec(vece, t, min);
6896 tcg_gen_smax_vec(vece, n, n, t);
6897 tcg_gen_dupi_vec(vece, t, max);
6898 tcg_gen_smin_vec(vece, n, n, t);
6899 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6900 tcg_gen_and_vec(vece, d, n, t);
6901 tcg_temp_free_vec(t);
6902}
6903
6904static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
6905{
6906 static const TCGOpcode vec_list[] = {
6907 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6908 };
6909 static const GVecGen2i ops[3] = {
6910 { .fniv = gen_sqshrnb_vec,
6911 .opt_opc = vec_list,
6912 .fno = gen_helper_sve2_sqshrnb_h,
6913 .vece = MO_16 },
6914 { .fniv = gen_sqshrnb_vec,
6915 .opt_opc = vec_list,
6916 .fno = gen_helper_sve2_sqshrnb_s,
6917 .vece = MO_32 },
6918 { .fniv = gen_sqshrnb_vec,
6919 .opt_opc = vec_list,
6920 .fno = gen_helper_sve2_sqshrnb_d,
6921 .vece = MO_64 },
6922 };
6923 return do_sve2_shr_narrow(s, a, ops);
6924}
6925
6926static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6927 TCGv_vec n, int64_t shr)
6928{
6929 TCGv_vec t = tcg_temp_new_vec_matching(d);
6930 int halfbits = 4 << vece;
6931 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6932 int64_t min = -max - 1;
6933
6934 tcg_gen_sari_vec(vece, n, n, shr);
6935 tcg_gen_dupi_vec(vece, t, min);
6936 tcg_gen_smax_vec(vece, n, n, t);
6937 tcg_gen_dupi_vec(vece, t, max);
6938 tcg_gen_smin_vec(vece, n, n, t);
6939 tcg_gen_shli_vec(vece, n, n, halfbits);
6940 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6941 tcg_gen_bitsel_vec(vece, d, t, d, n);
6942 tcg_temp_free_vec(t);
6943}
6944
6945static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
6946{
6947 static const TCGOpcode vec_list[] = {
6948 INDEX_op_shli_vec, INDEX_op_sari_vec,
6949 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6950 };
6951 static const GVecGen2i ops[3] = {
6952 { .fniv = gen_sqshrnt_vec,
6953 .opt_opc = vec_list,
6954 .load_dest = true,
6955 .fno = gen_helper_sve2_sqshrnt_h,
6956 .vece = MO_16 },
6957 { .fniv = gen_sqshrnt_vec,
6958 .opt_opc = vec_list,
6959 .load_dest = true,
6960 .fno = gen_helper_sve2_sqshrnt_s,
6961 .vece = MO_32 },
6962 { .fniv = gen_sqshrnt_vec,
6963 .opt_opc = vec_list,
6964 .load_dest = true,
6965 .fno = gen_helper_sve2_sqshrnt_d,
6966 .vece = MO_64 },
6967 };
6968 return do_sve2_shr_narrow(s, a, ops);
6969}
6970
6971static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
6972{
6973 static const GVecGen2i ops[3] = {
6974 { .fno = gen_helper_sve2_sqrshrnb_h },
6975 { .fno = gen_helper_sve2_sqrshrnb_s },
6976 { .fno = gen_helper_sve2_sqrshrnb_d },
6977 };
6978 return do_sve2_shr_narrow(s, a, ops);
6979}
6980
6981static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
6982{
6983 static const GVecGen2i ops[3] = {
6984 { .fno = gen_helper_sve2_sqrshrnt_h },
6985 { .fno = gen_helper_sve2_sqrshrnt_s },
6986 { .fno = gen_helper_sve2_sqrshrnt_d },
6987 };
6988 return do_sve2_shr_narrow(s, a, ops);
6989}
6990
c13418da
RH
6991static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6992 TCGv_vec n, int64_t shr)
6993{
6994 TCGv_vec t = tcg_temp_new_vec_matching(d);
6995 int halfbits = 4 << vece;
6996
6997 tcg_gen_shri_vec(vece, n, n, shr);
6998 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6999 tcg_gen_umin_vec(vece, d, n, t);
7000 tcg_temp_free_vec(t);
7001}
7002
7003static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7004{
7005 static const TCGOpcode vec_list[] = {
7006 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7007 };
7008 static const GVecGen2i ops[3] = {
7009 { .fniv = gen_uqshrnb_vec,
7010 .opt_opc = vec_list,
7011 .fno = gen_helper_sve2_uqshrnb_h,
7012 .vece = MO_16 },
7013 { .fniv = gen_uqshrnb_vec,
7014 .opt_opc = vec_list,
7015 .fno = gen_helper_sve2_uqshrnb_s,
7016 .vece = MO_32 },
7017 { .fniv = gen_uqshrnb_vec,
7018 .opt_opc = vec_list,
7019 .fno = gen_helper_sve2_uqshrnb_d,
7020 .vece = MO_64 },
7021 };
7022 return do_sve2_shr_narrow(s, a, ops);
7023}
7024
7025static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7026 TCGv_vec n, int64_t shr)
7027{
7028 TCGv_vec t = tcg_temp_new_vec_matching(d);
7029 int halfbits = 4 << vece;
7030
7031 tcg_gen_shri_vec(vece, n, n, shr);
7032 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7033 tcg_gen_umin_vec(vece, n, n, t);
7034 tcg_gen_shli_vec(vece, n, n, halfbits);
7035 tcg_gen_bitsel_vec(vece, d, t, d, n);
7036 tcg_temp_free_vec(t);
7037}
7038
7039static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7040{
7041 static const TCGOpcode vec_list[] = {
7042 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7043 };
7044 static const GVecGen2i ops[3] = {
7045 { .fniv = gen_uqshrnt_vec,
7046 .opt_opc = vec_list,
7047 .load_dest = true,
7048 .fno = gen_helper_sve2_uqshrnt_h,
7049 .vece = MO_16 },
7050 { .fniv = gen_uqshrnt_vec,
7051 .opt_opc = vec_list,
7052 .load_dest = true,
7053 .fno = gen_helper_sve2_uqshrnt_s,
7054 .vece = MO_32 },
7055 { .fniv = gen_uqshrnt_vec,
7056 .opt_opc = vec_list,
7057 .load_dest = true,
7058 .fno = gen_helper_sve2_uqshrnt_d,
7059 .vece = MO_64 },
7060 };
7061 return do_sve2_shr_narrow(s, a, ops);
7062}
7063
7064static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7065{
7066 static const GVecGen2i ops[3] = {
7067 { .fno = gen_helper_sve2_uqrshrnb_h },
7068 { .fno = gen_helper_sve2_uqrshrnb_s },
7069 { .fno = gen_helper_sve2_uqrshrnb_d },
7070 };
7071 return do_sve2_shr_narrow(s, a, ops);
7072}
7073
7074static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7075{
7076 static const GVecGen2i ops[3] = {
7077 { .fno = gen_helper_sve2_uqrshrnt_h },
7078 { .fno = gen_helper_sve2_uqrshrnt_s },
7079 { .fno = gen_helper_sve2_uqrshrnt_d },
7080 };
7081 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7082}
b87dbeeb 7083
40d5ea50 7084#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7085 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7086 NULL, gen_helper_sve2_##name##_h, \
7087 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7088 }; \
bd394cf5
RH
7089 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7090 name##_fns[a->esz], a, 0)
40d5ea50
SL
7091
7092DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7093DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7094DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7095DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7096
c3cd6766
SL
7097DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7098DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7099DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7100DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7101
ef75309b
RH
7102static gen_helper_gvec_flags_4 * const match_fns[4] = {
7103 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7104};
7105TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 7106
ef75309b
RH
7107static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7108 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7109};
7110TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 7111
5880bdc0
RH
7112static gen_helper_gvec_4 * const histcnt_fns[4] = {
7113 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7114};
7115TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7116 histcnt_fns[a->esz], a, 0)
7d47ac94 7117
bd394cf5
RH
7118TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7119 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7120
b87dbeeb
SL
7121static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7122 gen_helper_gvec_4_ptr *fn)
7123{
7124 if (!dc_isar_feature(aa64_sve2, s)) {
7125 return false;
7126 }
7127 return do_zpzz_fp(s, a, fn);
7128}
7129
7130#define DO_SVE2_ZPZZ_FP(NAME, name) \
7131static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7132{ \
7133 static gen_helper_gvec_4_ptr * const fns[4] = { \
7134 NULL, gen_helper_sve2_##name##_zpzz_h, \
7135 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7136 }; \
7137 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7138}
7139
7140DO_SVE2_ZPZZ_FP(FADDP, faddp)
7141DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7142DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7143DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7144DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7145
7146/*
7147 * SVE Integer Multiply-Add (unpredicated)
7148 */
7149
25aee7cc
RH
7150TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7151 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7152TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7153 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
4f26756b 7154
eeb4e84d
RH
7155static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7156 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7157 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7158};
7159TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7160 sqdmlal_zzzw_fns[a->esz], a, 0)
7161TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7162 sqdmlal_zzzw_fns[a->esz], a, 3)
7163TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7164 sqdmlal_zzzw_fns[a->esz], a, 2)
7165
7166static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7167 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7168 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7169};
7170TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7171 sqdmlsl_zzzw_fns[a->esz], a, 0)
7172TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7173 sqdmlsl_zzzw_fns[a->esz], a, 3)
7174TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7175 sqdmlsl_zzzw_fns[a->esz], a, 2)
7176
7177static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7178 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7179 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7180};
7181TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7182 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7183
eeb4e84d
RH
7184static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7185 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7186 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7187};
7188TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7189 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7190
eeb4e84d
RH
7191static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7192 NULL, gen_helper_sve2_smlal_zzzw_h,
7193 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7194};
7195TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7196 smlal_zzzw_fns[a->esz], a, 0)
7197TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7198 smlal_zzzw_fns[a->esz], a, 1)
7199
7200static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7201 NULL, gen_helper_sve2_umlal_zzzw_h,
7202 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7203};
7204TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7205 umlal_zzzw_fns[a->esz], a, 0)
7206TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7207 umlal_zzzw_fns[a->esz], a, 1)
7208
7209static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7210 NULL, gen_helper_sve2_smlsl_zzzw_h,
7211 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7212};
7213TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7214 smlsl_zzzw_fns[a->esz], a, 0)
7215TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7216 smlsl_zzzw_fns[a->esz], a, 1)
7217
7218static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7219 NULL, gen_helper_sve2_umlsl_zzzw_h,
7220 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7221};
7222TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7223 umlsl_zzzw_fns[a->esz], a, 0)
7224TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7225 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7226
5f425b92
RH
7227static gen_helper_gvec_4 * const cmla_fns[] = {
7228 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7229 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7230};
7231TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7232 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7233
5f425b92
RH
7234static gen_helper_gvec_4 * const cdot_fns[] = {
7235 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7236};
7237TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7238 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7239
5f425b92
RH
7240static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7241 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7242 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7243};
7244TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7245 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7246
8740d694
RH
7247TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7248 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7249
0ea3cdbf
RH
7250TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7251 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7252
32e2ad65
RH
7253TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7254 gen_helper_crypto_aese, a, false)
7255TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7256 gen_helper_crypto_aese, a, true)
3cc7a88e 7257
32e2ad65
RH
7258TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7259 gen_helper_crypto_sm4e, a, 0)
7260TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7261 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7262
2aa469ff 7263TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
5c1b7226 7264
0360730c
RH
7265TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7266 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7267TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7268 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
83c2523f 7269
0360730c
RH
7270TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7271 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
83c2523f 7272
0360730c
RH
7273TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7274 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7275TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7276 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
95365277 7277
27645836
RH
7278TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7279 float_round_to_odd, gen_helper_sve_fcvt_ds)
7280TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7281 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
631be02e
SL
7282
7283static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
7284{
7285 static gen_helper_gvec_3_ptr * const fns[] = {
7286 NULL, gen_helper_flogb_h,
7287 gen_helper_flogb_s, gen_helper_flogb_d
7288 };
7289
7290 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
7291 return false;
7292 }
7293 if (sve_access_check(s)) {
7294 TCGv_ptr status =
7295 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7296 unsigned vsz = vec_full_reg_size(s);
7297
7298 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
7299 vec_full_reg_offset(s, a->rn),
7300 pred_full_reg_offset(s, a->pg),
7301 status, vsz, vsz, 0, fns[a->esz]);
7302 tcg_temp_free_ptr(status);
7303 }
7304 return true;
7305}
50d102bd
SL
7306
7307static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7308{
7309 if (!dc_isar_feature(aa64_sve2, s)) {
7310 return false;
7311 }
41bf9b67
RH
7312 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7313 a->rd, a->rn, a->rm, a->ra,
7314 (sel << 1) | sub, cpu_env);
50d102bd
SL
7315}
7316
7317static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7318{
7319 return do_FMLAL_zzzw(s, a, false, false);
7320}
7321
7322static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7323{
7324 return do_FMLAL_zzzw(s, a, false, true);
7325}
7326
7327static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7328{
7329 return do_FMLAL_zzzw(s, a, true, false);
7330}
7331
7332static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7333{
7334 return do_FMLAL_zzzw(s, a, true, true);
7335}
7336
7337static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7338{
7339 if (!dc_isar_feature(aa64_sve2, s)) {
7340 return false;
7341 }
41bf9b67
RH
7342 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7343 a->rd, a->rn, a->rm, a->ra,
7344 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7345}
7346
7347static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7348{
7349 return do_FMLAL_zzxw(s, a, false, false);
7350}
7351
7352static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7353{
7354 return do_FMLAL_zzxw(s, a, false, true);
7355}
7356
7357static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7358{
7359 return do_FMLAL_zzxw(s, a, true, false);
7360}
7361
7362static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7363{
7364 return do_FMLAL_zzxw(s, a, true, true);
7365}
2323c5ff 7366
eec05e4e
RH
7367TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7368 gen_helper_gvec_smmla_b, a, 0)
7369TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7370 gen_helper_gvec_usmmla_b, a, 0)
7371TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7372 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7373
eec05e4e
RH
7374TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7375 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7376TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7377 gen_helper_gvec_bfdot_idx, a)
81266a1f 7378
eec05e4e
RH
7379TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7380 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7381
7382static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7383{
41bf9b67
RH
7384 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7385 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7386}
7387
698ddb9d
RH
7388TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7389TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
458d0ab6
RH
7390
7391static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7392{
41bf9b67
RH
7393 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7394 a->rd, a->rn, a->rm, a->ra,
7395 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7396}
7397
698ddb9d
RH
7398TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7399TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)