]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for do_FMLAL_zzxw
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
de58c6b0
RH
155static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
156 int rd, int rn, int data,
157 ARMFPStatusFlavour flavour)
158{
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 TCGv_ptr status = fpstatus_ptr(flavour);
165
166 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
167 vec_full_reg_offset(s, rn),
168 status, vsz, vsz, data, fn);
169 tcg_temp_free_ptr(status);
170 }
171 return true;
172}
173
174static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
175 arg_rr_esz *a, int data)
176{
177 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
178 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
179}
180
e645d1a1 181/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 182static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
183 int rd, int rn, int rm, int data)
184{
913a8a00
RH
185 if (fn == NULL) {
186 return false;
187 }
188 if (sve_access_check(s)) {
189 unsigned vsz = vec_full_reg_size(s);
190 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
191 vec_full_reg_offset(s, rn),
192 vec_full_reg_offset(s, rm),
193 vsz, vsz, data, fn);
194 }
195 return true;
e645d1a1
RH
196}
197
84a272f5
RH
198static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
199 arg_rrr_esz *a, int data)
200{
201 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
202}
203
532724e4
RH
204/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
205static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
206 int rd, int rn, int rm,
207 int data, ARMFPStatusFlavour flavour)
208{
209 if (fn == NULL) {
210 return false;
211 }
212 if (sve_access_check(s)) {
213 unsigned vsz = vec_full_reg_size(s);
214 TCGv_ptr status = fpstatus_ptr(flavour);
215
216 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
217 vec_full_reg_offset(s, rn),
218 vec_full_reg_offset(s, rm),
219 status, vsz, vsz, data, fn);
220
221 tcg_temp_free_ptr(status);
222 }
223 return true;
224}
225
226static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
227 arg_rrr_esz *a, int data)
228{
229 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
230 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
231}
232
38650638 233/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 234static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
235 int rd, int rn, int rm, int ra, int data)
236{
7ad416b1
RH
237 if (fn == NULL) {
238 return false;
239 }
240 if (sve_access_check(s)) {
241 unsigned vsz = vec_full_reg_size(s);
242 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
243 vec_full_reg_offset(s, rn),
244 vec_full_reg_offset(s, rm),
245 vec_full_reg_offset(s, ra),
246 vsz, vsz, data, fn);
247 }
248 return true;
38650638
RH
249}
250
cab79ac9
RH
251static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
252 arg_rrrr_esz *a, int data)
253{
254 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
255}
256
e82d3536
RH
257static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
258 arg_rrxr_esz *a)
259{
260 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
261}
262
41bf9b67
RH
263/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
264static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
265 int rd, int rn, int rm, int ra,
266 int data, TCGv_ptr ptr)
267{
268 if (fn == NULL) {
269 return false;
270 }
271 if (sve_access_check(s)) {
272 unsigned vsz = vec_full_reg_size(s);
273 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
274 vec_full_reg_offset(s, rn),
275 vec_full_reg_offset(s, rm),
276 vec_full_reg_offset(s, ra),
277 ptr, vsz, vsz, data, fn);
278 }
279 return true;
280}
281
282static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
283 int rd, int rn, int rm, int ra,
284 int data, ARMFPStatusFlavour flavour)
285{
286 TCGv_ptr status = fpstatus_ptr(flavour);
287 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
288 tcg_temp_free_ptr(status);
289 return ret;
290}
291
e14da110
RH
292/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
293static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
294 int rd, int rn, int rm, int ra, int pg,
295 int data, ARMFPStatusFlavour flavour)
296{
297 if (fn == NULL) {
298 return false;
299 }
300 if (sve_access_check(s)) {
301 unsigned vsz = vec_full_reg_size(s);
302 TCGv_ptr status = fpstatus_ptr(flavour);
303
304 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
305 vec_full_reg_offset(s, rn),
306 vec_full_reg_offset(s, rm),
307 vec_full_reg_offset(s, ra),
308 pred_full_reg_offset(s, pg),
309 status, vsz, vsz, data, fn);
310
311 tcg_temp_free_ptr(status);
312 }
313 return true;
314}
315
96a461f7 316/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 317static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
318 int rd, int rn, int pg, int data)
319{
8fb27a21
RH
320 if (fn == NULL) {
321 return false;
322 }
323 if (sve_access_check(s)) {
324 unsigned vsz = vec_full_reg_size(s);
325 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
326 vec_full_reg_offset(s, rn),
327 pred_full_reg_offset(s, pg),
328 vsz, vsz, data, fn);
329 }
330 return true;
96a461f7
RH
331}
332
b051809a
RH
333static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
334 arg_rpr_esz *a, int data)
335{
336 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
337}
338
afa2529c
RH
339static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
340 arg_rpri_esz *a)
341{
342 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
343}
b051809a 344
0360730c
RH
345static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
346 int rd, int rn, int pg, int data,
347 ARMFPStatusFlavour flavour)
348{
349 if (fn == NULL) {
350 return false;
351 }
352 if (sve_access_check(s)) {
353 unsigned vsz = vec_full_reg_size(s);
354 TCGv_ptr status = fpstatus_ptr(flavour);
355
356 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
357 vec_full_reg_offset(s, rn),
358 pred_full_reg_offset(s, pg),
359 status, vsz, vsz, data, fn);
360 tcg_temp_free_ptr(status);
361 }
362 return true;
363}
364
365static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
366 arg_rpr_esz *a, int data,
367 ARMFPStatusFlavour flavour)
368{
369 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
370}
371
36cbb7a8 372/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 373static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
374 int rd, int rn, int rm, int pg, int data)
375{
2a753d1e
RH
376 if (fn == NULL) {
377 return false;
378 }
379 if (sve_access_check(s)) {
380 unsigned vsz = vec_full_reg_size(s);
381 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
382 vec_full_reg_offset(s, rn),
383 vec_full_reg_offset(s, rm),
384 pred_full_reg_offset(s, pg),
385 vsz, vsz, data, fn);
386 }
387 return true;
36cbb7a8 388}
f7d79c41 389
312016c9
RH
390static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
391 arg_rprr_esz *a, int data)
392{
393 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
394}
395
7e2d07ff
RH
396/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
397static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
398 int rd, int rn, int rm, int pg, int data,
399 ARMFPStatusFlavour flavour)
400{
401 if (fn == NULL) {
402 return false;
403 }
404 if (sve_access_check(s)) {
405 unsigned vsz = vec_full_reg_size(s);
406 TCGv_ptr status = fpstatus_ptr(flavour);
407
408 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
409 vec_full_reg_offset(s, rn),
410 vec_full_reg_offset(s, rm),
411 pred_full_reg_offset(s, pg),
412 status, vsz, vsz, data, fn);
413 tcg_temp_free_ptr(status);
414 }
415 return true;
416}
417
418static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
419 arg_rprr_esz *a)
420{
421 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
422 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
423}
424
faf915e2
RH
425/* Invoke a vector expander on two Zregs and an immediate. */
426static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
427 int esz, int rd, int rn, uint64_t imm)
428{
429 if (gvec_fn == NULL) {
430 return false;
431 }
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gvec_fn(esz, vec_full_reg_offset(s, rd),
435 vec_full_reg_offset(s, rn), imm, vsz, vsz);
436 }
437 return true;
438}
439
ada378f0
RH
440static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
441 arg_rri_esz *a)
442{
443 if (a->esz < 0) {
444 /* Invalid tsz encoding -- see tszimm_esz. */
445 return false;
446 }
447 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
448}
449
39eea561 450/* Invoke a vector expander on three Zregs. */
50f6db5f 451static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 452 int esz, int rd, int rn, int rm)
38388f7e 453{
50f6db5f
RH
454 if (gvec_fn == NULL) {
455 return false;
456 }
457 if (sve_access_check(s)) {
458 unsigned vsz = vec_full_reg_size(s);
459 gvec_fn(esz, vec_full_reg_offset(s, rd),
460 vec_full_reg_offset(s, rn),
461 vec_full_reg_offset(s, rm), vsz, vsz);
462 }
463 return true;
38388f7e
RH
464}
465
cd54bbe6
RH
466static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
467 arg_rrr_esz *a)
468{
469 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
470}
471
911cdc6d 472/* Invoke a vector expander on four Zregs. */
189876af
RH
473static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
474 arg_rrrr_esz *a)
911cdc6d 475{
189876af
RH
476 if (gvec_fn == NULL) {
477 return false;
478 }
479 if (sve_access_check(s)) {
480 unsigned vsz = vec_full_reg_size(s);
481 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
482 vec_full_reg_offset(s, a->rn),
483 vec_full_reg_offset(s, a->rm),
484 vec_full_reg_offset(s, a->ra), vsz, vsz);
485 }
486 return true;
911cdc6d
RH
487}
488
39eea561
RH
489/* Invoke a vector move on two Zregs. */
490static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 491{
f7d79c41 492 if (sve_access_check(s)) {
5f730621
RH
493 unsigned vsz = vec_full_reg_size(s);
494 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
495 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
496 }
497 return true;
38388f7e
RH
498}
499
d9d78dcc
RH
500/* Initialize a Zreg with replications of a 64-bit immediate. */
501static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
502{
503 unsigned vsz = vec_full_reg_size(s);
8711e71f 504 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
505}
506
516e246a 507/* Invoke a vector expander on three Pregs. */
23e5fa5f 508static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
dd81a8d7 509 int rd, int rn, int rm)
516e246a 510{
23e5fa5f
RH
511 if (sve_access_check(s)) {
512 unsigned psz = pred_gvec_reg_size(s);
513 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
514 pred_full_reg_offset(s, rn),
515 pred_full_reg_offset(s, rm), psz, psz);
516 }
517 return true;
516e246a
RH
518}
519
520/* Invoke a vector move on two Pregs. */
521static bool do_mov_p(DisasContext *s, int rd, int rn)
522{
d0b2df5a
RH
523 if (sve_access_check(s)) {
524 unsigned psz = pred_gvec_reg_size(s);
525 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
526 pred_full_reg_offset(s, rn), psz, psz);
527 }
528 return true;
516e246a
RH
529}
530
9e18d7a6
RH
531/* Set the cpu flags as per a return from an SVE helper. */
532static void do_pred_flags(TCGv_i32 t)
533{
534 tcg_gen_mov_i32(cpu_NF, t);
535 tcg_gen_andi_i32(cpu_ZF, t, 2);
536 tcg_gen_andi_i32(cpu_CF, t, 1);
537 tcg_gen_movi_i32(cpu_VF, 0);
538}
539
540/* Subroutines computing the ARM PredTest psuedofunction. */
541static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
542{
543 TCGv_i32 t = tcg_temp_new_i32();
544
545 gen_helper_sve_predtest1(t, d, g);
546 do_pred_flags(t);
547 tcg_temp_free_i32(t);
548}
549
550static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
551{
552 TCGv_ptr dptr = tcg_temp_new_ptr();
553 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 554 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
555
556 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
557 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 558
392acacc 559 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
560 tcg_temp_free_ptr(dptr);
561 tcg_temp_free_ptr(gptr);
562
563 do_pred_flags(t);
564 tcg_temp_free_i32(t);
565}
566
028e2a7b
RH
567/* For each element size, the bits within a predicate word that are active. */
568const uint64_t pred_esz_masks[4] = {
569 0xffffffffffffffffull, 0x5555555555555555ull,
570 0x1111111111111111ull, 0x0101010101010101ull
571};
572
c437c59b
RH
573static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
574{
575 unallocated_encoding(s);
576 return true;
577}
578
39eea561
RH
579/*
580 *** SVE Logical - Unpredicated Group
581 */
582
b262215b
RH
583TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
584TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
585TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
586TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 587
e6eba6e5
RH
588static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
589{
590 TCGv_i64 t = tcg_temp_new_i64();
591 uint64_t mask = dup_const(MO_8, 0xff >> sh);
592
593 tcg_gen_xor_i64(t, n, m);
594 tcg_gen_shri_i64(d, t, sh);
595 tcg_gen_shli_i64(t, t, 8 - sh);
596 tcg_gen_andi_i64(d, d, mask);
597 tcg_gen_andi_i64(t, t, ~mask);
598 tcg_gen_or_i64(d, d, t);
599 tcg_temp_free_i64(t);
600}
601
602static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
603{
604 TCGv_i64 t = tcg_temp_new_i64();
605 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
606
607 tcg_gen_xor_i64(t, n, m);
608 tcg_gen_shri_i64(d, t, sh);
609 tcg_gen_shli_i64(t, t, 16 - sh);
610 tcg_gen_andi_i64(d, d, mask);
611 tcg_gen_andi_i64(t, t, ~mask);
612 tcg_gen_or_i64(d, d, t);
613 tcg_temp_free_i64(t);
614}
615
616static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
617{
618 tcg_gen_xor_i32(d, n, m);
619 tcg_gen_rotri_i32(d, d, sh);
620}
621
622static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
623{
624 tcg_gen_xor_i64(d, n, m);
625 tcg_gen_rotri_i64(d, d, sh);
626}
627
628static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
629 TCGv_vec m, int64_t sh)
630{
631 tcg_gen_xor_vec(vece, d, n, m);
632 tcg_gen_rotri_vec(vece, d, d, sh);
633}
634
635void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
636 uint32_t rm_ofs, int64_t shift,
637 uint32_t opr_sz, uint32_t max_sz)
638{
639 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
640 static const GVecGen3i ops[4] = {
641 { .fni8 = gen_xar8_i64,
642 .fniv = gen_xar_vec,
643 .fno = gen_helper_sve2_xar_b,
644 .opt_opc = vecop,
645 .vece = MO_8 },
646 { .fni8 = gen_xar16_i64,
647 .fniv = gen_xar_vec,
648 .fno = gen_helper_sve2_xar_h,
649 .opt_opc = vecop,
650 .vece = MO_16 },
651 { .fni4 = gen_xar_i32,
652 .fniv = gen_xar_vec,
653 .fno = gen_helper_sve2_xar_s,
654 .opt_opc = vecop,
655 .vece = MO_32 },
656 { .fni8 = gen_xar_i64,
657 .fniv = gen_xar_vec,
658 .fno = gen_helper_gvec_xar_d,
659 .opt_opc = vecop,
660 .vece = MO_64 }
661 };
662 int esize = 8 << vece;
663
664 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
665 tcg_debug_assert(shift >= 0);
666 tcg_debug_assert(shift <= esize);
667 shift &= esize - 1;
668
669 if (shift == 0) {
670 /* xar with no rotate devolves to xor. */
671 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
672 } else {
673 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
674 shift, &ops[vece]);
675 }
676}
677
678static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
679{
680 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
681 return false;
682 }
683 if (sve_access_check(s)) {
684 unsigned vsz = vec_full_reg_size(s);
685 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
686 vec_full_reg_offset(s, a->rn),
687 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
688 }
689 return true;
690}
691
911cdc6d
RH
692static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
693{
694 tcg_gen_xor_i64(d, n, m);
695 tcg_gen_xor_i64(d, d, k);
696}
697
698static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
699 TCGv_vec m, TCGv_vec k)
700{
701 tcg_gen_xor_vec(vece, d, n, m);
702 tcg_gen_xor_vec(vece, d, d, k);
703}
704
705static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
706 uint32_t a, uint32_t oprsz, uint32_t maxsz)
707{
708 static const GVecGen4 op = {
709 .fni8 = gen_eor3_i64,
710 .fniv = gen_eor3_vec,
711 .fno = gen_helper_sve2_eor3,
712 .vece = MO_64,
713 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
714 };
715 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
716}
717
b773a5c8 718TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
719
720static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
721{
722 tcg_gen_andc_i64(d, m, k);
723 tcg_gen_xor_i64(d, d, n);
724}
725
726static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
727 TCGv_vec m, TCGv_vec k)
728{
729 tcg_gen_andc_vec(vece, d, m, k);
730 tcg_gen_xor_vec(vece, d, d, n);
731}
732
733static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
734 uint32_t a, uint32_t oprsz, uint32_t maxsz)
735{
736 static const GVecGen4 op = {
737 .fni8 = gen_bcax_i64,
738 .fniv = gen_bcax_vec,
739 .fno = gen_helper_sve2_bcax,
740 .vece = MO_64,
741 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
742 };
743 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
744}
745
b773a5c8 746TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
747
748static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
749 uint32_t a, uint32_t oprsz, uint32_t maxsz)
750{
751 /* BSL differs from the generic bitsel in argument ordering. */
752 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
753}
754
b773a5c8 755TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
756
757static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
758{
759 tcg_gen_andc_i64(n, k, n);
760 tcg_gen_andc_i64(m, m, k);
761 tcg_gen_or_i64(d, n, m);
762}
763
764static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
765 TCGv_vec m, TCGv_vec k)
766{
767 if (TCG_TARGET_HAS_bitsel_vec) {
768 tcg_gen_not_vec(vece, n, n);
769 tcg_gen_bitsel_vec(vece, d, k, n, m);
770 } else {
771 tcg_gen_andc_vec(vece, n, k, n);
772 tcg_gen_andc_vec(vece, m, m, k);
773 tcg_gen_or_vec(vece, d, n, m);
774 }
775}
776
777static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
778 uint32_t a, uint32_t oprsz, uint32_t maxsz)
779{
780 static const GVecGen4 op = {
781 .fni8 = gen_bsl1n_i64,
782 .fniv = gen_bsl1n_vec,
783 .fno = gen_helper_sve2_bsl1n,
784 .vece = MO_64,
785 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
786 };
787 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
788}
789
b773a5c8 790TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
791
792static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
793{
794 /*
795 * Z[dn] = (n & k) | (~m & ~k)
796 * = | ~(m | k)
797 */
798 tcg_gen_and_i64(n, n, k);
799 if (TCG_TARGET_HAS_orc_i64) {
800 tcg_gen_or_i64(m, m, k);
801 tcg_gen_orc_i64(d, n, m);
802 } else {
803 tcg_gen_nor_i64(m, m, k);
804 tcg_gen_or_i64(d, n, m);
805 }
806}
807
808static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
809 TCGv_vec m, TCGv_vec k)
810{
811 if (TCG_TARGET_HAS_bitsel_vec) {
812 tcg_gen_not_vec(vece, m, m);
813 tcg_gen_bitsel_vec(vece, d, k, n, m);
814 } else {
815 tcg_gen_and_vec(vece, n, n, k);
816 tcg_gen_or_vec(vece, m, m, k);
817 tcg_gen_orc_vec(vece, d, n, m);
818 }
819}
820
821static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
822 uint32_t a, uint32_t oprsz, uint32_t maxsz)
823{
824 static const GVecGen4 op = {
825 .fni8 = gen_bsl2n_i64,
826 .fniv = gen_bsl2n_vec,
827 .fno = gen_helper_sve2_bsl2n,
828 .vece = MO_64,
829 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
830 };
831 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
832}
833
b773a5c8 834TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
835
836static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
837{
838 tcg_gen_and_i64(n, n, k);
839 tcg_gen_andc_i64(m, m, k);
840 tcg_gen_nor_i64(d, n, m);
841}
842
843static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
844 TCGv_vec m, TCGv_vec k)
845{
846 tcg_gen_bitsel_vec(vece, d, k, n, m);
847 tcg_gen_not_vec(vece, d, d);
848}
849
850static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
851 uint32_t a, uint32_t oprsz, uint32_t maxsz)
852{
853 static const GVecGen4 op = {
854 .fni8 = gen_nbsl_i64,
855 .fniv = gen_nbsl_vec,
856 .fno = gen_helper_sve2_nbsl,
857 .vece = MO_64,
858 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
859 };
860 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
861}
862
b773a5c8 863TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 864
fea98f9c
RH
865/*
866 *** SVE Integer Arithmetic - Unpredicated Group
867 */
868
b262215b
RH
869TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
870TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
871TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
872TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
873TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
874TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 875
f97cfd59
RH
876/*
877 *** SVE Integer Arithmetic - Binary Predicated Group
878 */
879
a2103582
RH
880/* Select active elememnts from Zn and inactive elements from Zm,
881 * storing the result in Zd.
882 */
68cc4ee3 883static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
884{
885 static gen_helper_gvec_4 * const fns[4] = {
886 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
887 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
888 };
68cc4ee3 889 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
890}
891
8e7acb24
RH
892#define DO_ZPZZ(NAME, FEAT, name) \
893 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
894 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
895 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 896 }; \
8e7acb24
RH
897 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
898 name##_zpzz_fns[a->esz], a, 0)
899
900DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
901DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
902DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
903DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
904
905DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
906DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
907
908DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
909DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
910DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
911DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
912DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
913DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
914
915DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
916DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
917DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
918
919DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
920DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
921DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
922
923static gen_helper_gvec_4 * const sdiv_fns[4] = {
924 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
925};
926TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 927
8e7acb24
RH
928static gen_helper_gvec_4 * const udiv_fns[4] = {
929 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
930};
931TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 932
29693f5f 933TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
d3fe4a29 934
afac6d04
RH
935/*
936 *** SVE Integer Arithmetic - Unary Predicated Group
937 */
938
817bd5c9
RH
939#define DO_ZPZ(NAME, FEAT, name) \
940 static gen_helper_gvec_3 * const name##_fns[4] = { \
941 gen_helper_##name##_b, gen_helper_##name##_h, \
942 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 943 }; \
817bd5c9
RH
944 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
945
946DO_ZPZ(CLS, aa64_sve, sve_cls)
947DO_ZPZ(CLZ, aa64_sve, sve_clz)
948DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
949DO_ZPZ(CNOT, aa64_sve, sve_cnot)
950DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
951DO_ZPZ(ABS, aa64_sve, sve_abs)
952DO_ZPZ(NEG, aa64_sve, sve_neg)
953DO_ZPZ(RBIT, aa64_sve, sve_rbit)
954
955static gen_helper_gvec_3 * const fabs_fns[4] = {
956 NULL, gen_helper_sve_fabs_h,
957 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
958};
959TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 960
817bd5c9
RH
961static gen_helper_gvec_3 * const fneg_fns[4] = {
962 NULL, gen_helper_sve_fneg_h,
963 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
964};
965TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 966
817bd5c9
RH
967static gen_helper_gvec_3 * const sxtb_fns[4] = {
968 NULL, gen_helper_sve_sxtb_h,
969 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
970};
971TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 972
817bd5c9
RH
973static gen_helper_gvec_3 * const uxtb_fns[4] = {
974 NULL, gen_helper_sve_uxtb_h,
975 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
976};
977TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 978
817bd5c9
RH
979static gen_helper_gvec_3 * const sxth_fns[4] = {
980 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
981};
982TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 983
817bd5c9
RH
984static gen_helper_gvec_3 * const uxth_fns[4] = {
985 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
986};
987TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 988
817bd5c9
RH
989TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
990 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
991TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
992 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 993
047cec97
RH
994/*
995 *** SVE Integer Reduction Group
996 */
997
998typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
999static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
1000 gen_helper_gvec_reduc *fn)
1001{
1002 unsigned vsz = vec_full_reg_size(s);
1003 TCGv_ptr t_zn, t_pg;
1004 TCGv_i32 desc;
1005 TCGv_i64 temp;
1006
1007 if (fn == NULL) {
1008 return false;
1009 }
1010 if (!sve_access_check(s)) {
1011 return true;
1012 }
1013
c6a59b55 1014 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
1015 temp = tcg_temp_new_i64();
1016 t_zn = tcg_temp_new_ptr();
1017 t_pg = tcg_temp_new_ptr();
1018
1019 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
1020 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
1021 fn(temp, t_zn, t_pg, desc);
1022 tcg_temp_free_ptr(t_zn);
1023 tcg_temp_free_ptr(t_pg);
047cec97
RH
1024
1025 write_fp_dreg(s, a->rd, temp);
1026 tcg_temp_free_i64(temp);
1027 return true;
1028}
1029
1030#define DO_VPZ(NAME, name) \
9ac24f1f 1031 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
1032 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1033 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1034 }; \
9ac24f1f 1035 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
1036
1037DO_VPZ(ORV, orv)
1038DO_VPZ(ANDV, andv)
1039DO_VPZ(EORV, eorv)
1040
1041DO_VPZ(UADDV, uaddv)
1042DO_VPZ(SMAXV, smaxv)
1043DO_VPZ(UMAXV, umaxv)
1044DO_VPZ(SMINV, sminv)
1045DO_VPZ(UMINV, uminv)
1046
9ac24f1f
RH
1047static gen_helper_gvec_reduc * const saddv_fns[4] = {
1048 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
1049 gen_helper_sve_saddv_s, NULL
1050};
1051TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
1052
1053#undef DO_VPZ
1054
ccd841c3
RH
1055/*
1056 *** SVE Shift by Immediate - Predicated Group
1057 */
1058
60245996
RH
1059/*
1060 * Copy Zn into Zd, storing zeros into inactive elements.
1061 * If invert, store zeros into the active elements.
ccd841c3 1062 */
60245996
RH
1063static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1064 int esz, bool invert)
ccd841c3 1065{
60245996
RH
1066 static gen_helper_gvec_3 * const fns[4] = {
1067 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1068 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 1069 };
8fb27a21 1070 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
1071}
1072
73c558a8
RH
1073static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1074 gen_helper_gvec_3 * const fns[4])
ccd841c3 1075{
73c558a8
RH
1076 int max;
1077
ccd841c3
RH
1078 if (a->esz < 0) {
1079 /* Invalid tsz encoding -- see tszimm_esz. */
1080 return false;
1081 }
73c558a8
RH
1082
1083 /*
1084 * Shift by element size is architecturally valid.
1085 * For arithmetic right-shift, it's the same as by one less.
1086 * For logical shifts and ASRD, it is a zeroing operation.
1087 */
1088 max = 8 << a->esz;
1089 if (a->imm >= max) {
1090 if (asr) {
1091 a->imm = max - 1;
1092 } else {
1093 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1094 }
1095 }
afa2529c 1096 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1097}
1098
5cccd1f1
RH
1099static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1100 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1101 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1102};
1103TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 1104
5cccd1f1
RH
1105static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1106 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1107 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1108};
1109TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 1110
5cccd1f1
RH
1111static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1112 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1113 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1114};
1115TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 1116
5cccd1f1
RH
1117static gen_helper_gvec_3 * const asrd_fns[4] = {
1118 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1119 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1120};
1121TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 1122
4df37e41
RH
1123static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1124 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1125 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1126};
1127TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1128 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1129
4df37e41
RH
1130static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1131 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1132 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1133};
1134TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1135 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1136
4df37e41
RH
1137static gen_helper_gvec_3 * const srshr_fns[4] = {
1138 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1139 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1140};
1141TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1142 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1143
4df37e41
RH
1144static gen_helper_gvec_3 * const urshr_fns[4] = {
1145 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1146 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1147};
1148TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1149 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1150
4df37e41
RH
1151static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1152 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1153 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1154};
1155TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1156 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1157
fe7f8dfb
RH
1158/*
1159 *** SVE Bitwise Shift - Predicated Group
1160 */
1161
1162#define DO_ZPZW(NAME, name) \
8e7acb24 1163 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1164 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1165 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1166 }; \
8e7acb24
RH
1167 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1168 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1169
1170DO_ZPZW(ASR, asr)
1171DO_ZPZW(LSR, lsr)
1172DO_ZPZW(LSL, lsl)
1173
1174#undef DO_ZPZW
1175
d9d78dcc
RH
1176/*
1177 *** SVE Bitwise Shift - Unpredicated Group
1178 */
1179
1180static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1181 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1182 int64_t, uint32_t, uint32_t))
1183{
1184 if (a->esz < 0) {
1185 /* Invalid tsz encoding -- see tszimm_esz. */
1186 return false;
1187 }
1188 if (sve_access_check(s)) {
1189 unsigned vsz = vec_full_reg_size(s);
1190 /* Shift by element size is architecturally valid. For
1191 arithmetic right-shift, it's the same as by one less.
1192 Otherwise it is a zeroing operation. */
1193 if (a->imm >= 8 << a->esz) {
1194 if (asr) {
1195 a->imm = (8 << a->esz) - 1;
1196 } else {
1197 do_dupi_z(s, a->rd, 0);
1198 return true;
1199 }
1200 }
1201 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1202 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1203 }
1204 return true;
1205}
1206
5e612f80
RH
1207TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1208TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1209TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1210
d9d78dcc 1211#define DO_ZZW(NAME, name) \
32e2ad65 1212 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1213 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1214 gen_helper_sve_##name##_zzw_s, NULL \
1215 }; \
32e2ad65
RH
1216 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1217 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1218
32e2ad65
RH
1219DO_ZZW(ASR_zzw, asr)
1220DO_ZZW(LSR_zzw, lsr)
1221DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1222
1223#undef DO_ZZW
1224
96a36e4a
RH
1225/*
1226 *** SVE Integer Multiply-Add Group
1227 */
1228
1229static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1230 gen_helper_gvec_5 *fn)
1231{
1232 if (sve_access_check(s)) {
1233 unsigned vsz = vec_full_reg_size(s);
1234 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1235 vec_full_reg_offset(s, a->ra),
1236 vec_full_reg_offset(s, a->rn),
1237 vec_full_reg_offset(s, a->rm),
1238 pred_full_reg_offset(s, a->pg),
1239 vsz, vsz, 0, fn);
1240 }
1241 return true;
1242}
1243
dc67e645
RH
1244static gen_helper_gvec_5 * const mla_fns[4] = {
1245 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1246 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1247};
1248TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1249
dc67e645
RH
1250static gen_helper_gvec_5 * const mls_fns[4] = {
1251 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1252 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1253};
1254TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1255
9a56c9c3
RH
1256/*
1257 *** SVE Index Generation Group
1258 */
1259
6687d05d 1260static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1261 TCGv_i64 start, TCGv_i64 incr)
1262{
6687d05d
RH
1263 unsigned vsz;
1264 TCGv_i32 desc;
1265 TCGv_ptr t_zd;
1266
1267 if (!sve_access_check(s)) {
1268 return true;
1269 }
1270
1271 vsz = vec_full_reg_size(s);
1272 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1273 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1274
1275 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1276 if (esz == 3) {
1277 gen_helper_sve_index_d(t_zd, start, incr, desc);
1278 } else {
1279 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1280 static index_fn * const fns[3] = {
1281 gen_helper_sve_index_b,
1282 gen_helper_sve_index_h,
1283 gen_helper_sve_index_s,
1284 };
1285 TCGv_i32 s32 = tcg_temp_new_i32();
1286 TCGv_i32 i32 = tcg_temp_new_i32();
1287
1288 tcg_gen_extrl_i64_i32(s32, start);
1289 tcg_gen_extrl_i64_i32(i32, incr);
1290 fns[esz](t_zd, s32, i32, desc);
1291
1292 tcg_temp_free_i32(s32);
1293 tcg_temp_free_i32(i32);
1294 }
1295 tcg_temp_free_ptr(t_zd);
6687d05d 1296 return true;
9a56c9c3
RH
1297}
1298
9aa60c83
RH
1299TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1300 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1301TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1302 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1303TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1304 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1305TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1306 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1307
96f922cc
RH
1308/*
1309 *** SVE Stack Allocation Group
1310 */
1311
3a7be554 1312static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1313{
5de56742
AC
1314 if (sve_access_check(s)) {
1315 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1316 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1317 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1318 }
96f922cc
RH
1319 return true;
1320}
1321
3a7be554 1322static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1323{
5de56742
AC
1324 if (sve_access_check(s)) {
1325 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1326 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1327 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1328 }
96f922cc
RH
1329 return true;
1330}
1331
3a7be554 1332static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1333{
5de56742
AC
1334 if (sve_access_check(s)) {
1335 TCGv_i64 reg = cpu_reg(s, a->rd);
1336 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1337 }
96f922cc
RH
1338 return true;
1339}
1340
4b242d9c
RH
1341/*
1342 *** SVE Compute Vector Address Group
1343 */
1344
1345static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1346{
913a8a00 1347 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1348}
1349
dcba3d67
RH
1350TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1351TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1352TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1353TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1354
0762cd42
RH
1355/*
1356 *** SVE Integer Misc - Unpredicated Group
1357 */
1358
0ea3cdbf
RH
1359static gen_helper_gvec_2 * const fexpa_fns[4] = {
1360 NULL, gen_helper_sve_fexpa_h,
1361 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1362};
1363TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1364 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1365
32e2ad65
RH
1366static gen_helper_gvec_3 * const ftssel_fns[4] = {
1367 NULL, gen_helper_sve_ftssel_h,
1368 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1369};
1370TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1371
516e246a
RH
1372/*
1373 *** SVE Predicate Logical Operations Group
1374 */
1375
1376static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1377 const GVecGen4 *gvec_op)
1378{
1379 if (!sve_access_check(s)) {
1380 return true;
1381 }
1382
1383 unsigned psz = pred_gvec_reg_size(s);
1384 int dofs = pred_full_reg_offset(s, a->rd);
1385 int nofs = pred_full_reg_offset(s, a->rn);
1386 int mofs = pred_full_reg_offset(s, a->rm);
1387 int gofs = pred_full_reg_offset(s, a->pg);
1388
dd81a8d7
RH
1389 if (!a->s) {
1390 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1391 return true;
1392 }
1393
516e246a
RH
1394 if (psz == 8) {
1395 /* Do the operation and the flags generation in temps. */
1396 TCGv_i64 pd = tcg_temp_new_i64();
1397 TCGv_i64 pn = tcg_temp_new_i64();
1398 TCGv_i64 pm = tcg_temp_new_i64();
1399 TCGv_i64 pg = tcg_temp_new_i64();
1400
1401 tcg_gen_ld_i64(pn, cpu_env, nofs);
1402 tcg_gen_ld_i64(pm, cpu_env, mofs);
1403 tcg_gen_ld_i64(pg, cpu_env, gofs);
1404
1405 gvec_op->fni8(pd, pn, pm, pg);
1406 tcg_gen_st_i64(pd, cpu_env, dofs);
1407
1408 do_predtest1(pd, pg);
1409
1410 tcg_temp_free_i64(pd);
1411 tcg_temp_free_i64(pn);
1412 tcg_temp_free_i64(pm);
1413 tcg_temp_free_i64(pg);
1414 } else {
1415 /* The operation and flags generation is large. The computation
1416 * of the flags depends on the original contents of the guarding
1417 * predicate. If the destination overwrites the guarding predicate,
1418 * then the easiest way to get this right is to save a copy.
1419 */
1420 int tofs = gofs;
1421 if (a->rd == a->pg) {
1422 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1423 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1424 }
1425
1426 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1427 do_predtest(s, dofs, tofs, psz / 8);
1428 }
1429 return true;
1430}
1431
1432static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1433{
1434 tcg_gen_and_i64(pd, pn, pm);
1435 tcg_gen_and_i64(pd, pd, pg);
1436}
1437
1438static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1439 TCGv_vec pm, TCGv_vec pg)
1440{
1441 tcg_gen_and_vec(vece, pd, pn, pm);
1442 tcg_gen_and_vec(vece, pd, pd, pg);
1443}
1444
3a7be554 1445static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1446{
1447 static const GVecGen4 op = {
1448 .fni8 = gen_and_pg_i64,
1449 .fniv = gen_and_pg_vec,
1450 .fno = gen_helper_sve_and_pppp,
1451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1452 };
dd81a8d7
RH
1453
1454 if (!a->s) {
dd81a8d7
RH
1455 if (a->rn == a->rm) {
1456 if (a->pg == a->rn) {
23e5fa5f 1457 return do_mov_p(s, a->rd, a->rn);
dd81a8d7 1458 }
23e5fa5f 1459 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
dd81a8d7 1460 } else if (a->pg == a->rn || a->pg == a->rm) {
23e5fa5f 1461 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
516e246a 1462 }
516e246a 1463 }
dd81a8d7 1464 return do_pppp_flags(s, a, &op);
516e246a
RH
1465}
1466
1467static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1468{
1469 tcg_gen_andc_i64(pd, pn, pm);
1470 tcg_gen_and_i64(pd, pd, pg);
1471}
1472
1473static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1474 TCGv_vec pm, TCGv_vec pg)
1475{
1476 tcg_gen_andc_vec(vece, pd, pn, pm);
1477 tcg_gen_and_vec(vece, pd, pd, pg);
1478}
1479
3a7be554 1480static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1481{
1482 static const GVecGen4 op = {
1483 .fni8 = gen_bic_pg_i64,
1484 .fniv = gen_bic_pg_vec,
1485 .fno = gen_helper_sve_bic_pppp,
1486 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1487 };
dd81a8d7
RH
1488
1489 if (!a->s && a->pg == a->rn) {
23e5fa5f 1490 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
516e246a 1491 }
dd81a8d7 1492 return do_pppp_flags(s, a, &op);
516e246a
RH
1493}
1494
1495static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1496{
1497 tcg_gen_xor_i64(pd, pn, pm);
1498 tcg_gen_and_i64(pd, pd, pg);
1499}
1500
1501static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1502 TCGv_vec pm, TCGv_vec pg)
1503{
1504 tcg_gen_xor_vec(vece, pd, pn, pm);
1505 tcg_gen_and_vec(vece, pd, pd, pg);
1506}
1507
3a7be554 1508static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1509{
1510 static const GVecGen4 op = {
1511 .fni8 = gen_eor_pg_i64,
1512 .fniv = gen_eor_pg_vec,
1513 .fno = gen_helper_sve_eor_pppp,
1514 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1515 };
738b679c
RH
1516
1517 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1518 if (!a->s && a->pg == a->rm) {
1519 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1520 }
dd81a8d7 1521 return do_pppp_flags(s, a, &op);
516e246a
RH
1522}
1523
3a7be554 1524static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1525{
516e246a
RH
1526 if (a->s) {
1527 return false;
516e246a 1528 }
d4bc6232
RH
1529 if (sve_access_check(s)) {
1530 unsigned psz = pred_gvec_reg_size(s);
1531 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1532 pred_full_reg_offset(s, a->pg),
1533 pred_full_reg_offset(s, a->rn),
1534 pred_full_reg_offset(s, a->rm), psz, psz);
1535 }
1536 return true;
516e246a
RH
1537}
1538
1539static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1540{
1541 tcg_gen_or_i64(pd, pn, pm);
1542 tcg_gen_and_i64(pd, pd, pg);
1543}
1544
1545static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1546 TCGv_vec pm, TCGv_vec pg)
1547{
1548 tcg_gen_or_vec(vece, pd, pn, pm);
1549 tcg_gen_and_vec(vece, pd, pd, pg);
1550}
1551
3a7be554 1552static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1553{
1554 static const GVecGen4 op = {
1555 .fni8 = gen_orr_pg_i64,
1556 .fniv = gen_orr_pg_vec,
1557 .fno = gen_helper_sve_orr_pppp,
1558 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1559 };
dd81a8d7
RH
1560
1561 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1562 return do_mov_p(s, a->rd, a->rn);
516e246a 1563 }
dd81a8d7 1564 return do_pppp_flags(s, a, &op);
516e246a
RH
1565}
1566
1567static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1568{
1569 tcg_gen_orc_i64(pd, pn, pm);
1570 tcg_gen_and_i64(pd, pd, pg);
1571}
1572
1573static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1574 TCGv_vec pm, TCGv_vec pg)
1575{
1576 tcg_gen_orc_vec(vece, pd, pn, pm);
1577 tcg_gen_and_vec(vece, pd, pd, pg);
1578}
1579
3a7be554 1580static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1581{
1582 static const GVecGen4 op = {
1583 .fni8 = gen_orn_pg_i64,
1584 .fniv = gen_orn_pg_vec,
1585 .fno = gen_helper_sve_orn_pppp,
1586 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1587 };
dd81a8d7 1588 return do_pppp_flags(s, a, &op);
516e246a
RH
1589}
1590
1591static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1592{
1593 tcg_gen_or_i64(pd, pn, pm);
1594 tcg_gen_andc_i64(pd, pg, pd);
1595}
1596
1597static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1598 TCGv_vec pm, TCGv_vec pg)
1599{
1600 tcg_gen_or_vec(vece, pd, pn, pm);
1601 tcg_gen_andc_vec(vece, pd, pg, pd);
1602}
1603
3a7be554 1604static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1605{
1606 static const GVecGen4 op = {
1607 .fni8 = gen_nor_pg_i64,
1608 .fniv = gen_nor_pg_vec,
1609 .fno = gen_helper_sve_nor_pppp,
1610 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1611 };
dd81a8d7 1612 return do_pppp_flags(s, a, &op);
516e246a
RH
1613}
1614
1615static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1616{
1617 tcg_gen_and_i64(pd, pn, pm);
1618 tcg_gen_andc_i64(pd, pg, pd);
1619}
1620
1621static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1622 TCGv_vec pm, TCGv_vec pg)
1623{
1624 tcg_gen_and_vec(vece, pd, pn, pm);
1625 tcg_gen_andc_vec(vece, pd, pg, pd);
1626}
1627
3a7be554 1628static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1629{
1630 static const GVecGen4 op = {
1631 .fni8 = gen_nand_pg_i64,
1632 .fniv = gen_nand_pg_vec,
1633 .fno = gen_helper_sve_nand_pppp,
1634 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1635 };
dd81a8d7 1636 return do_pppp_flags(s, a, &op);
516e246a
RH
1637}
1638
9e18d7a6
RH
1639/*
1640 *** SVE Predicate Misc Group
1641 */
1642
3a7be554 1643static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1644{
1645 if (sve_access_check(s)) {
1646 int nofs = pred_full_reg_offset(s, a->rn);
1647 int gofs = pred_full_reg_offset(s, a->pg);
1648 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1649
1650 if (words == 1) {
1651 TCGv_i64 pn = tcg_temp_new_i64();
1652 TCGv_i64 pg = tcg_temp_new_i64();
1653
1654 tcg_gen_ld_i64(pn, cpu_env, nofs);
1655 tcg_gen_ld_i64(pg, cpu_env, gofs);
1656 do_predtest1(pn, pg);
1657
1658 tcg_temp_free_i64(pn);
1659 tcg_temp_free_i64(pg);
1660 } else {
1661 do_predtest(s, nofs, gofs, words);
1662 }
1663 }
1664 return true;
1665}
1666
028e2a7b
RH
1667/* See the ARM pseudocode DecodePredCount. */
1668static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1669{
1670 unsigned elements = fullsz >> esz;
1671 unsigned bound;
1672
1673 switch (pattern) {
1674 case 0x0: /* POW2 */
1675 return pow2floor(elements);
1676 case 0x1: /* VL1 */
1677 case 0x2: /* VL2 */
1678 case 0x3: /* VL3 */
1679 case 0x4: /* VL4 */
1680 case 0x5: /* VL5 */
1681 case 0x6: /* VL6 */
1682 case 0x7: /* VL7 */
1683 case 0x8: /* VL8 */
1684 bound = pattern;
1685 break;
1686 case 0x9: /* VL16 */
1687 case 0xa: /* VL32 */
1688 case 0xb: /* VL64 */
1689 case 0xc: /* VL128 */
1690 case 0xd: /* VL256 */
1691 bound = 16 << (pattern - 9);
1692 break;
1693 case 0x1d: /* MUL4 */
1694 return elements - elements % 4;
1695 case 0x1e: /* MUL3 */
1696 return elements - elements % 3;
1697 case 0x1f: /* ALL */
1698 return elements;
1699 default: /* #uimm5 */
1700 return 0;
1701 }
1702 return elements >= bound ? bound : 0;
1703}
1704
1705/* This handles all of the predicate initialization instructions,
1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1707 * so that decode_pred_count returns 0. For SETFFR, we will have
1708 * set RD == 16 == FFR.
1709 */
1710static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1711{
1712 if (!sve_access_check(s)) {
1713 return true;
1714 }
1715
1716 unsigned fullsz = vec_full_reg_size(s);
1717 unsigned ofs = pred_full_reg_offset(s, rd);
1718 unsigned numelem, setsz, i;
1719 uint64_t word, lastword;
1720 TCGv_i64 t;
1721
1722 numelem = decode_pred_count(fullsz, pat, esz);
1723
1724 /* Determine what we must store into each bit, and how many. */
1725 if (numelem == 0) {
1726 lastword = word = 0;
1727 setsz = fullsz;
1728 } else {
1729 setsz = numelem << esz;
1730 lastword = word = pred_esz_masks[esz];
1731 if (setsz % 64) {
973558a3 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1733 }
1734 }
1735
1736 t = tcg_temp_new_i64();
1737 if (fullsz <= 64) {
1738 tcg_gen_movi_i64(t, lastword);
1739 tcg_gen_st_i64(t, cpu_env, ofs);
1740 goto done;
1741 }
1742
1743 if (word == lastword) {
1744 unsigned maxsz = size_for_gvec(fullsz / 8);
1745 unsigned oprsz = size_for_gvec(setsz / 8);
1746
1747 if (oprsz * 8 == setsz) {
8711e71f 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1749 goto done;
1750 }
028e2a7b
RH
1751 }
1752
1753 setsz /= 8;
1754 fullsz /= 8;
1755
1756 tcg_gen_movi_i64(t, word);
973558a3 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1758 tcg_gen_st_i64(t, cpu_env, ofs + i);
1759 }
1760 if (lastword != word) {
1761 tcg_gen_movi_i64(t, lastword);
1762 tcg_gen_st_i64(t, cpu_env, ofs + i);
1763 i += 8;
1764 }
1765 if (i < fullsz) {
1766 tcg_gen_movi_i64(t, 0);
1767 for (; i < fullsz; i += 8) {
1768 tcg_gen_st_i64(t, cpu_env, ofs + i);
1769 }
1770 }
1771
1772 done:
1773 tcg_temp_free_i64(t);
1774
1775 /* PTRUES */
1776 if (setflag) {
1777 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1778 tcg_gen_movi_i32(cpu_CF, word == 0);
1779 tcg_gen_movi_i32(cpu_VF, 0);
1780 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1781 }
1782 return true;
1783}
1784
b03a8501 1785TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1786
b03a8501
RH
1787/* Note pat == 31 is #all, to set all elements. */
1788TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1789
b03a8501
RH
1790/* Note pat == 32 is #unimp, to set no elements. */
1791TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1792
3a7be554 1793static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1794{
1795 /* The path through do_pppp_flags is complicated enough to want to avoid
1796 * duplication. Frob the arguments into the form of a predicated AND.
1797 */
1798 arg_rprr_s alt_a = {
1799 .rd = a->rd, .pg = a->pg, .s = a->s,
1800 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1801 };
3a7be554 1802 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1803}
1804
ff502658
RH
1805TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1806TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1807
1808static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1809 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1810 TCGv_ptr, TCGv_i32))
1811{
1812 if (!sve_access_check(s)) {
1813 return true;
1814 }
1815
1816 TCGv_ptr t_pd = tcg_temp_new_ptr();
1817 TCGv_ptr t_pg = tcg_temp_new_ptr();
1818 TCGv_i32 t;
86300b5d 1819 unsigned desc = 0;
028e2a7b 1820
86300b5d
RH
1821 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1822 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1823
1824 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1825 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1826 t = tcg_temp_new_i32();
028e2a7b 1827
392acacc 1828 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1829 tcg_temp_free_ptr(t_pd);
1830 tcg_temp_free_ptr(t_pg);
1831
1832 do_pred_flags(t);
1833 tcg_temp_free_i32(t);
1834 return true;
1835}
1836
d95040e3
RH
1837TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1838TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1839
24e82e68
RH
1840/*
1841 *** SVE Element Count Group
1842 */
1843
1844/* Perform an inline saturating addition of a 32-bit value within
1845 * a 64-bit register. The second operand is known to be positive,
1846 * which halves the comparisions we must perform to bound the result.
1847 */
1848static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1849{
1850 int64_t ibound;
24e82e68
RH
1851
1852 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1853 if (u) {
1854 tcg_gen_ext32u_i64(reg, reg);
1855 } else {
1856 tcg_gen_ext32s_i64(reg, reg);
1857 }
1858 if (d) {
1859 tcg_gen_sub_i64(reg, reg, val);
1860 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1861 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1862 } else {
1863 tcg_gen_add_i64(reg, reg, val);
1864 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1865 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1866 }
24e82e68
RH
1867}
1868
1869/* Similarly with 64-bit values. */
1870static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1871{
1872 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1873 TCGv_i64 t2;
1874
1875 if (u) {
1876 if (d) {
1877 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1878 t2 = tcg_constant_i64(0);
1879 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1880 } else {
1881 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1882 t2 = tcg_constant_i64(-1);
1883 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1884 }
1885 } else {
35a1ec8e 1886 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1887 if (d) {
1888 /* Detect signed overflow for subtraction. */
1889 tcg_gen_xor_i64(t0, reg, val);
1890 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1891 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1892 tcg_gen_and_i64(t0, t0, reg);
1893
1894 /* Bound the result. */
1895 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1896 t2 = tcg_constant_i64(0);
24e82e68
RH
1897 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1898 } else {
1899 /* Detect signed overflow for addition. */
1900 tcg_gen_xor_i64(t0, reg, val);
1901 tcg_gen_add_i64(reg, reg, val);
1902 tcg_gen_xor_i64(t1, reg, val);
1903 tcg_gen_andc_i64(t0, t1, t0);
1904
1905 /* Bound the result. */
1906 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1907 t2 = tcg_constant_i64(0);
24e82e68
RH
1908 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1909 }
35a1ec8e 1910 tcg_temp_free_i64(t1);
24e82e68
RH
1911 }
1912 tcg_temp_free_i64(t0);
24e82e68
RH
1913}
1914
1915/* Similarly with a vector and a scalar operand. */
1916static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1917 TCGv_i64 val, bool u, bool d)
1918{
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_ptr dptr, nptr;
1921 TCGv_i32 t32, desc;
1922 TCGv_i64 t64;
1923
1924 dptr = tcg_temp_new_ptr();
1925 nptr = tcg_temp_new_ptr();
1926 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1927 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1928 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1929
1930 switch (esz) {
1931 case MO_8:
1932 t32 = tcg_temp_new_i32();
1933 tcg_gen_extrl_i64_i32(t32, val);
1934 if (d) {
1935 tcg_gen_neg_i32(t32, t32);
1936 }
1937 if (u) {
1938 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1939 } else {
1940 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1941 }
1942 tcg_temp_free_i32(t32);
1943 break;
1944
1945 case MO_16:
1946 t32 = tcg_temp_new_i32();
1947 tcg_gen_extrl_i64_i32(t32, val);
1948 if (d) {
1949 tcg_gen_neg_i32(t32, t32);
1950 }
1951 if (u) {
1952 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1953 } else {
1954 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1955 }
1956 tcg_temp_free_i32(t32);
1957 break;
1958
1959 case MO_32:
1960 t64 = tcg_temp_new_i64();
1961 if (d) {
1962 tcg_gen_neg_i64(t64, val);
1963 } else {
1964 tcg_gen_mov_i64(t64, val);
1965 }
1966 if (u) {
1967 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1968 } else {
1969 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1970 }
1971 tcg_temp_free_i64(t64);
1972 break;
1973
1974 case MO_64:
1975 if (u) {
1976 if (d) {
1977 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1978 } else {
1979 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1980 }
1981 } else if (d) {
1982 t64 = tcg_temp_new_i64();
1983 tcg_gen_neg_i64(t64, val);
1984 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1985 tcg_temp_free_i64(t64);
1986 } else {
1987 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1988 }
1989 break;
1990
1991 default:
1992 g_assert_not_reached();
1993 }
1994
1995 tcg_temp_free_ptr(dptr);
1996 tcg_temp_free_ptr(nptr);
24e82e68
RH
1997}
1998
3a7be554 1999static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2000{
2001 if (sve_access_check(s)) {
2002 unsigned fullsz = vec_full_reg_size(s);
2003 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2004 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2005 }
2006 return true;
2007}
2008
3a7be554 2009static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2010{
2011 if (sve_access_check(s)) {
2012 unsigned fullsz = vec_full_reg_size(s);
2013 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2014 int inc = numelem * a->imm * (a->d ? -1 : 1);
2015 TCGv_i64 reg = cpu_reg(s, a->rd);
2016
2017 tcg_gen_addi_i64(reg, reg, inc);
2018 }
2019 return true;
2020}
2021
3a7be554 2022static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2023{
2024 if (!sve_access_check(s)) {
2025 return true;
2026 }
2027
2028 unsigned fullsz = vec_full_reg_size(s);
2029 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2030 int inc = numelem * a->imm;
2031 TCGv_i64 reg = cpu_reg(s, a->rd);
2032
2033 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2034 if (inc == 0) {
2035 if (a->u) {
2036 tcg_gen_ext32u_i64(reg, reg);
2037 } else {
2038 tcg_gen_ext32s_i64(reg, reg);
2039 }
2040 } else {
d681f125 2041 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2042 }
2043 return true;
2044}
2045
3a7be554 2046static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2047{
2048 if (!sve_access_check(s)) {
2049 return true;
2050 }
2051
2052 unsigned fullsz = vec_full_reg_size(s);
2053 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2054 int inc = numelem * a->imm;
2055 TCGv_i64 reg = cpu_reg(s, a->rd);
2056
2057 if (inc != 0) {
d681f125 2058 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2059 }
2060 return true;
2061}
2062
3a7be554 2063static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2064{
2065 if (a->esz == 0) {
2066 return false;
2067 }
2068
2069 unsigned fullsz = vec_full_reg_size(s);
2070 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2071 int inc = numelem * a->imm;
2072
2073 if (inc != 0) {
2074 if (sve_access_check(s)) {
24e82e68
RH
2075 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2076 vec_full_reg_offset(s, a->rn),
d681f125
RH
2077 tcg_constant_i64(a->d ? -inc : inc),
2078 fullsz, fullsz);
24e82e68
RH
2079 }
2080 } else {
2081 do_mov_z(s, a->rd, a->rn);
2082 }
2083 return true;
2084}
2085
3a7be554 2086static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2087{
2088 if (a->esz == 0) {
2089 return false;
2090 }
2091
2092 unsigned fullsz = vec_full_reg_size(s);
2093 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2094 int inc = numelem * a->imm;
2095
2096 if (inc != 0) {
2097 if (sve_access_check(s)) {
d681f125
RH
2098 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2099 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2100 }
2101 } else {
2102 do_mov_z(s, a->rd, a->rn);
2103 }
2104 return true;
2105}
2106
e1fa1164
RH
2107/*
2108 *** SVE Bitwise Immediate Group
2109 */
2110
2111static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2112{
2113 uint64_t imm;
2114 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2115 extract32(a->dbm, 0, 6),
2116 extract32(a->dbm, 6, 6))) {
2117 return false;
2118 }
faf915e2 2119 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2120}
2121
15a314da
RH
2122TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2123TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2124TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2125
3a7be554 2126static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2127{
2128 uint64_t imm;
2129 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2130 extract32(a->dbm, 0, 6),
2131 extract32(a->dbm, 6, 6))) {
2132 return false;
2133 }
2134 if (sve_access_check(s)) {
2135 do_dupi_z(s, a->rd, imm);
2136 }
2137 return true;
2138}
2139
f25a2361
RH
2140/*
2141 *** SVE Integer Wide Immediate - Predicated Group
2142 */
2143
2144/* Implement all merging copies. This is used for CPY (immediate),
2145 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2146 */
2147static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2148 TCGv_i64 val)
2149{
2150 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2151 static gen_cpy * const fns[4] = {
2152 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2153 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2154 };
2155 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2156 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2157 TCGv_ptr t_zd = tcg_temp_new_ptr();
2158 TCGv_ptr t_zn = tcg_temp_new_ptr();
2159 TCGv_ptr t_pg = tcg_temp_new_ptr();
2160
2161 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2162 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2163 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2164
2165 fns[esz](t_zd, t_zn, t_pg, val, desc);
2166
2167 tcg_temp_free_ptr(t_zd);
2168 tcg_temp_free_ptr(t_zn);
2169 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2170}
2171
3a7be554 2172static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2173{
2174 if (a->esz == 0) {
2175 return false;
2176 }
2177 if (sve_access_check(s)) {
2178 /* Decode the VFP immediate. */
2179 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2180 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2181 }
2182 return true;
2183}
2184
3a7be554 2185static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2186{
f25a2361 2187 if (sve_access_check(s)) {
e152b48b 2188 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2189 }
2190 return true;
2191}
2192
3a7be554 2193static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2194{
2195 static gen_helper_gvec_2i * const fns[4] = {
2196 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2197 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2198 };
2199
f25a2361
RH
2200 if (sve_access_check(s)) {
2201 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2202 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2203 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2204 tcg_constant_i64(a->imm),
2205 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2206 }
2207 return true;
2208}
2209
b94f8f60
RH
2210/*
2211 *** SVE Permute Extract Group
2212 */
2213
75114792 2214static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2215{
2216 if (!sve_access_check(s)) {
2217 return true;
2218 }
2219
2220 unsigned vsz = vec_full_reg_size(s);
75114792 2221 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2222 unsigned n_siz = vsz - n_ofs;
75114792
SL
2223 unsigned d = vec_full_reg_offset(s, rd);
2224 unsigned n = vec_full_reg_offset(s, rn);
2225 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2226
2227 /* Use host vector move insns if we have appropriate sizes
2228 * and no unfortunate overlap.
2229 */
2230 if (m != d
2231 && n_ofs == size_for_gvec(n_ofs)
2232 && n_siz == size_for_gvec(n_siz)
2233 && (d != n || n_siz <= n_ofs)) {
2234 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2235 if (n_ofs != 0) {
2236 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2237 }
2238 } else {
2239 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2240 }
2241 return true;
2242}
2243
c799c115
RH
2244TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2245TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2246
30562ab7
RH
2247/*
2248 *** SVE Permute - Unpredicated Group
2249 */
2250
3a7be554 2251static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2252{
2253 if (sve_access_check(s)) {
2254 unsigned vsz = vec_full_reg_size(s);
2255 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2256 vsz, vsz, cpu_reg_sp(s, a->rn));
2257 }
2258 return true;
2259}
2260
3a7be554 2261static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2262{
2263 if ((a->imm & 0x1f) == 0) {
2264 return false;
2265 }
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned dofs = vec_full_reg_offset(s, a->rd);
2269 unsigned esz, index;
2270
2271 esz = ctz32(a->imm);
2272 index = a->imm >> (esz + 1);
2273
2274 if ((index << esz) < vsz) {
2275 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2276 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2277 } else {
7e17d50e
RH
2278 /*
2279 * While dup_mem handles 128-bit elements, dup_imm does not.
2280 * Thankfully element size doesn't matter for splatting zero.
2281 */
2282 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2283 }
2284 }
2285 return true;
2286}
2287
2288static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2289{
2290 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2291 static gen_insr * const fns[4] = {
2292 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2293 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2294 };
2295 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2296 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2297 TCGv_ptr t_zd = tcg_temp_new_ptr();
2298 TCGv_ptr t_zn = tcg_temp_new_ptr();
2299
2300 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2301 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2302
2303 fns[a->esz](t_zd, t_zn, val, desc);
2304
2305 tcg_temp_free_ptr(t_zd);
2306 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2307}
2308
3a7be554 2309static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2310{
2311 if (sve_access_check(s)) {
2312 TCGv_i64 t = tcg_temp_new_i64();
2313 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2314 do_insr_i64(s, a, t);
2315 tcg_temp_free_i64(t);
2316 }
2317 return true;
2318}
2319
3a7be554 2320static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2321{
2322 if (sve_access_check(s)) {
2323 do_insr_i64(s, a, cpu_reg(s, a->rm));
2324 }
2325 return true;
2326}
2327
0ea3cdbf
RH
2328static gen_helper_gvec_2 * const rev_fns[4] = {
2329 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2330 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2331};
2332TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2333
32e2ad65
RH
2334static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2335 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2336 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2337};
2338TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2339
5f425b92
RH
2340static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2341 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2342 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2343};
2344TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2345 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2346
32e2ad65
RH
2347static gen_helper_gvec_3 * const tbx_fns[4] = {
2348 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2349 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2350};
2351TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2352
3a7be554 2353static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2354{
2355 static gen_helper_gvec_2 * const fns[4][2] = {
2356 { NULL, NULL },
2357 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2358 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2359 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2360 };
2361
2362 if (a->esz == 0) {
2363 return false;
2364 }
2365 if (sve_access_check(s)) {
2366 unsigned vsz = vec_full_reg_size(s);
2367 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2368 vec_full_reg_offset(s, a->rn)
2369 + (a->h ? vsz / 2 : 0),
2370 vsz, vsz, 0, fns[a->esz][a->u]);
2371 }
2372 return true;
2373}
2374
d731d8cb
RH
2375/*
2376 *** SVE Permute - Predicates Group
2377 */
2378
2379static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2380 gen_helper_gvec_3 *fn)
2381{
2382 if (!sve_access_check(s)) {
2383 return true;
2384 }
2385
2386 unsigned vsz = pred_full_reg_size(s);
2387
d731d8cb
RH
2388 TCGv_ptr t_d = tcg_temp_new_ptr();
2389 TCGv_ptr t_n = tcg_temp_new_ptr();
2390 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2391 uint32_t desc = 0;
d731d8cb 2392
f9b0fcce
RH
2393 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2394 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2395 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2396
2397 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2398 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2399 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2400
c6a59b55 2401 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2402
2403 tcg_temp_free_ptr(t_d);
2404 tcg_temp_free_ptr(t_n);
2405 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2406 return true;
2407}
2408
2409static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2410 gen_helper_gvec_2 *fn)
2411{
2412 if (!sve_access_check(s)) {
2413 return true;
2414 }
2415
2416 unsigned vsz = pred_full_reg_size(s);
2417 TCGv_ptr t_d = tcg_temp_new_ptr();
2418 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2419 uint32_t desc = 0;
d731d8cb
RH
2420
2421 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2422 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2423
70acaafe
RH
2424 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2425 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2426 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2427
c6a59b55 2428 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2429
d731d8cb
RH
2430 tcg_temp_free_ptr(t_d);
2431 tcg_temp_free_ptr(t_n);
2432 return true;
2433}
2434
bdb349f5
RH
2435TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2436TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2437TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2438TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2439TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2440TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2441
1d0fce4b
RH
2442TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2443TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2444TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2445
234b48e9
RH
2446/*
2447 *** SVE Permute - Interleaving Group
2448 */
2449
a95b9618
RH
2450static gen_helper_gvec_3 * const zip_fns[4] = {
2451 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2452 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2453};
2454TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2455 zip_fns[a->esz], a, 0)
2456TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2457 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2458
2459TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2460 gen_helper_sve2_zip_q, a, 0)
2461TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2462 gen_helper_sve2_zip_q, a,
2463 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2464
234b48e9
RH
2465static gen_helper_gvec_3 * const uzp_fns[4] = {
2466 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2467 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2468};
2469
32e2ad65
RH
2470TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2471 uzp_fns[a->esz], a, 0)
2472TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2473 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2474
32e2ad65
RH
2475TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2476 gen_helper_sve2_uzp_q, a, 0)
2477TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2478 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2479
234b48e9
RH
2480static gen_helper_gvec_3 * const trn_fns[4] = {
2481 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2482 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2483};
2484
32e2ad65
RH
2485TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2486 trn_fns[a->esz], a, 0)
2487TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2488 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2489
32e2ad65
RH
2490TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2491 gen_helper_sve2_trn_q, a, 0)
2492TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2493 gen_helper_sve2_trn_q, a, 16)
74b64b25 2494
3ca879ae
RH
2495/*
2496 *** SVE Permute Vector - Predicated Group
2497 */
2498
817bd5c9
RH
2499static gen_helper_gvec_3 * const compact_fns[4] = {
2500 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2501};
2502TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2503
ef23cb72
RH
2504/* Call the helper that computes the ARM LastActiveElement pseudocode
2505 * function, scaled by the element size. This includes the not found
2506 * indication; e.g. not found for esz=3 is -8.
2507 */
2508static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2509{
2510 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2511 * round up, as we do elsewhere, because we need the exact size.
2512 */
2513 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2514 unsigned desc = 0;
ef23cb72 2515
2acbfbe4
RH
2516 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2517 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2518
2519 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2520
c6a59b55 2521 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2522
ef23cb72
RH
2523 tcg_temp_free_ptr(t_p);
2524}
2525
2526/* Increment LAST to the offset of the next element in the vector,
2527 * wrapping around to 0.
2528 */
2529static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2530{
2531 unsigned vsz = vec_full_reg_size(s);
2532
2533 tcg_gen_addi_i32(last, last, 1 << esz);
2534 if (is_power_of_2(vsz)) {
2535 tcg_gen_andi_i32(last, last, vsz - 1);
2536 } else {
4b308bd5
RH
2537 TCGv_i32 max = tcg_constant_i32(vsz);
2538 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2539 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2540 }
2541}
2542
2543/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2544static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2545{
2546 unsigned vsz = vec_full_reg_size(s);
2547
2548 if (is_power_of_2(vsz)) {
2549 tcg_gen_andi_i32(last, last, vsz - 1);
2550 } else {
4b308bd5
RH
2551 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2552 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2553 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2554 }
2555}
2556
2557/* Load an unsigned element of ESZ from BASE+OFS. */
2558static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2559{
2560 TCGv_i64 r = tcg_temp_new_i64();
2561
2562 switch (esz) {
2563 case 0:
2564 tcg_gen_ld8u_i64(r, base, ofs);
2565 break;
2566 case 1:
2567 tcg_gen_ld16u_i64(r, base, ofs);
2568 break;
2569 case 2:
2570 tcg_gen_ld32u_i64(r, base, ofs);
2571 break;
2572 case 3:
2573 tcg_gen_ld_i64(r, base, ofs);
2574 break;
2575 default:
2576 g_assert_not_reached();
2577 }
2578 return r;
2579}
2580
2581/* Load an unsigned element of ESZ from RM[LAST]. */
2582static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2583 int rm, int esz)
2584{
2585 TCGv_ptr p = tcg_temp_new_ptr();
2586 TCGv_i64 r;
2587
2588 /* Convert offset into vector into offset into ENV.
2589 * The final adjustment for the vector register base
2590 * is added via constant offset to the load.
2591 */
e03b5686 2592#if HOST_BIG_ENDIAN
ef23cb72
RH
2593 /* Adjust for element ordering. See vec_reg_offset. */
2594 if (esz < 3) {
2595 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2596 }
2597#endif
2598 tcg_gen_ext_i32_ptr(p, last);
2599 tcg_gen_add_ptr(p, p, cpu_env);
2600
2601 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2602 tcg_temp_free_ptr(p);
2603
2604 return r;
2605}
2606
2607/* Compute CLAST for a Zreg. */
2608static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2609{
2610 TCGv_i32 last;
2611 TCGLabel *over;
2612 TCGv_i64 ele;
2613 unsigned vsz, esz = a->esz;
2614
2615 if (!sve_access_check(s)) {
2616 return true;
2617 }
2618
2619 last = tcg_temp_local_new_i32();
2620 over = gen_new_label();
2621
2622 find_last_active(s, last, esz, a->pg);
2623
2624 /* There is of course no movcond for a 2048-bit vector,
2625 * so we must branch over the actual store.
2626 */
2627 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2628
2629 if (!before) {
2630 incr_last_active(s, last, esz);
2631 }
2632
2633 ele = load_last_active(s, last, a->rm, esz);
2634 tcg_temp_free_i32(last);
2635
2636 vsz = vec_full_reg_size(s);
2637 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2638 tcg_temp_free_i64(ele);
2639
2640 /* If this insn used MOVPRFX, we may need a second move. */
2641 if (a->rd != a->rn) {
2642 TCGLabel *done = gen_new_label();
2643 tcg_gen_br(done);
2644
2645 gen_set_label(over);
2646 do_mov_z(s, a->rd, a->rn);
2647
2648 gen_set_label(done);
2649 } else {
2650 gen_set_label(over);
2651 }
2652 return true;
2653}
2654
db7fa5d8
RH
2655TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2656TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2657
2658/* Compute CLAST for a scalar. */
2659static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2660 bool before, TCGv_i64 reg_val)
2661{
2662 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2663 TCGv_i64 ele, cmp;
ef23cb72
RH
2664
2665 find_last_active(s, last, esz, pg);
2666
2667 /* Extend the original value of last prior to incrementing. */
2668 cmp = tcg_temp_new_i64();
2669 tcg_gen_ext_i32_i64(cmp, last);
2670
2671 if (!before) {
2672 incr_last_active(s, last, esz);
2673 }
2674
2675 /* The conceit here is that while last < 0 indicates not found, after
2676 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2677 * from which we can load garbage. We then discard the garbage with
2678 * a conditional move.
2679 */
2680 ele = load_last_active(s, last, rm, esz);
2681 tcg_temp_free_i32(last);
2682
053552d3
RH
2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2684 ele, reg_val);
ef23cb72 2685
ef23cb72
RH
2686 tcg_temp_free_i64(cmp);
2687 tcg_temp_free_i64(ele);
2688}
2689
2690/* Compute CLAST for a Vreg. */
2691static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2692{
2693 if (sve_access_check(s)) {
2694 int esz = a->esz;
2695 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2696 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2697
2698 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2699 write_fp_dreg(s, a->rd, reg);
2700 tcg_temp_free_i64(reg);
2701 }
2702 return true;
2703}
2704
ac4fb247
RH
2705TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2706TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2707
2708/* Compute CLAST for a Xreg. */
2709static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2710{
2711 TCGv_i64 reg;
2712
2713 if (!sve_access_check(s)) {
2714 return true;
2715 }
2716
2717 reg = cpu_reg(s, a->rd);
2718 switch (a->esz) {
2719 case 0:
2720 tcg_gen_ext8u_i64(reg, reg);
2721 break;
2722 case 1:
2723 tcg_gen_ext16u_i64(reg, reg);
2724 break;
2725 case 2:
2726 tcg_gen_ext32u_i64(reg, reg);
2727 break;
2728 case 3:
2729 break;
2730 default:
2731 g_assert_not_reached();
2732 }
2733
2734 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2735 return true;
2736}
2737
c673404a
RH
2738TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2739TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2740
2741/* Compute LAST for a scalar. */
2742static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2743 int pg, int rm, bool before)
2744{
2745 TCGv_i32 last = tcg_temp_new_i32();
2746 TCGv_i64 ret;
2747
2748 find_last_active(s, last, esz, pg);
2749 if (before) {
2750 wrap_last_active(s, last, esz);
2751 } else {
2752 incr_last_active(s, last, esz);
2753 }
2754
2755 ret = load_last_active(s, last, rm, esz);
2756 tcg_temp_free_i32(last);
2757 return ret;
2758}
2759
2760/* Compute LAST for a Vreg. */
2761static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2762{
2763 if (sve_access_check(s)) {
2764 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2765 write_fp_dreg(s, a->rd, val);
2766 tcg_temp_free_i64(val);
2767 }
2768 return true;
2769}
2770
75de9fd4
RH
2771TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2772TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2773
2774/* Compute LAST for a Xreg. */
2775static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2776{
2777 if (sve_access_check(s)) {
2778 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2779 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2780 tcg_temp_free_i64(val);
2781 }
2782 return true;
2783}
2784
884c5a80
RH
2785TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2786TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2787
3a7be554 2788static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2789{
2790 if (sve_access_check(s)) {
2791 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2792 }
2793 return true;
2794}
2795
3a7be554 2796static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2797{
2798 if (sve_access_check(s)) {
2799 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2800 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2801 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2802 tcg_temp_free_i64(t);
2803 }
2804 return true;
2805}
2806
817bd5c9
RH
2807static gen_helper_gvec_3 * const revb_fns[4] = {
2808 NULL, gen_helper_sve_revb_h,
2809 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2810};
2811TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2812
817bd5c9
RH
2813static gen_helper_gvec_3 * const revh_fns[4] = {
2814 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2815};
2816TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2817
817bd5c9
RH
2818TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2819 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2820
897ebd70
RH
2821TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2822 gen_helper_sve_splice, a, a->esz)
b48ff240 2823
897ebd70
RH
2824TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2825 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2826
757f9cff
RH
2827/*
2828 *** SVE Integer Compare - Vectors Group
2829 */
2830
2831static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2832 gen_helper_gvec_flags_4 *gen_fn)
2833{
2834 TCGv_ptr pd, zn, zm, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2837
2838 if (gen_fn == NULL) {
2839 return false;
2840 }
2841 if (!sve_access_check(s)) {
2842 return true;
2843 }
2844
2845 vsz = vec_full_reg_size(s);
392acacc 2846 t = tcg_temp_new_i32();
757f9cff
RH
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 zm = tcg_temp_new_ptr();
2850 pg = tcg_temp_new_ptr();
2851
2852 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2855 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2856
392acacc 2857 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2858
2859 tcg_temp_free_ptr(pd);
2860 tcg_temp_free_ptr(zn);
2861 tcg_temp_free_ptr(zm);
2862 tcg_temp_free_ptr(pg);
2863
2864 do_pred_flags(t);
2865
2866 tcg_temp_free_i32(t);
2867 return true;
2868}
2869
2870#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2871 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2872 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2873 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2874 }; \
2875 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2876 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2877
2878DO_PPZZ(CMPEQ, cmpeq)
2879DO_PPZZ(CMPNE, cmpne)
2880DO_PPZZ(CMPGT, cmpgt)
2881DO_PPZZ(CMPGE, cmpge)
2882DO_PPZZ(CMPHI, cmphi)
2883DO_PPZZ(CMPHS, cmphs)
2884
2885#undef DO_PPZZ
2886
2887#define DO_PPZW(NAME, name) \
671bdb2e
RH
2888 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2889 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2890 gen_helper_sve_##name##_ppzw_s, NULL \
2891 }; \
2892 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2893 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2894
2895DO_PPZW(CMPEQ, cmpeq)
2896DO_PPZW(CMPNE, cmpne)
2897DO_PPZW(CMPGT, cmpgt)
2898DO_PPZW(CMPGE, cmpge)
2899DO_PPZW(CMPHI, cmphi)
2900DO_PPZW(CMPHS, cmphs)
2901DO_PPZW(CMPLT, cmplt)
2902DO_PPZW(CMPLE, cmple)
2903DO_PPZW(CMPLO, cmplo)
2904DO_PPZW(CMPLS, cmpls)
2905
2906#undef DO_PPZW
2907
38cadeba
RH
2908/*
2909 *** SVE Integer Compare - Immediate Groups
2910 */
2911
2912static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2913 gen_helper_gvec_flags_3 *gen_fn)
2914{
2915 TCGv_ptr pd, zn, pg;
2916 unsigned vsz;
2917 TCGv_i32 t;
2918
2919 if (gen_fn == NULL) {
2920 return false;
2921 }
2922 if (!sve_access_check(s)) {
2923 return true;
2924 }
2925
2926 vsz = vec_full_reg_size(s);
392acacc 2927 t = tcg_temp_new_i32();
38cadeba
RH
2928 pd = tcg_temp_new_ptr();
2929 zn = tcg_temp_new_ptr();
2930 pg = tcg_temp_new_ptr();
2931
2932 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2933 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2934 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2935
392acacc 2936 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
2937
2938 tcg_temp_free_ptr(pd);
2939 tcg_temp_free_ptr(zn);
2940 tcg_temp_free_ptr(pg);
2941
2942 do_pred_flags(t);
2943
2944 tcg_temp_free_i32(t);
2945 return true;
2946}
2947
2948#define DO_PPZI(NAME, name) \
9c545be6 2949 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
2950 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2951 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2952 }; \
9c545be6
RH
2953 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2954 name##_ppzi_fns[a->esz])
38cadeba
RH
2955
2956DO_PPZI(CMPEQ, cmpeq)
2957DO_PPZI(CMPNE, cmpne)
2958DO_PPZI(CMPGT, cmpgt)
2959DO_PPZI(CMPGE, cmpge)
2960DO_PPZI(CMPHI, cmphi)
2961DO_PPZI(CMPHS, cmphs)
2962DO_PPZI(CMPLT, cmplt)
2963DO_PPZI(CMPLE, cmple)
2964DO_PPZI(CMPLO, cmplo)
2965DO_PPZI(CMPLS, cmpls)
2966
2967#undef DO_PPZI
2968
35da316f
RH
2969/*
2970 *** SVE Partition Break Group
2971 */
2972
2973static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2974 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2975{
2976 if (!sve_access_check(s)) {
2977 return true;
2978 }
2979
2980 unsigned vsz = pred_full_reg_size(s);
2981
2982 /* Predicate sizes may be smaller and cannot use simd_desc. */
2983 TCGv_ptr d = tcg_temp_new_ptr();
2984 TCGv_ptr n = tcg_temp_new_ptr();
2985 TCGv_ptr m = tcg_temp_new_ptr();
2986 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2987 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2988
2989 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2990 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2991 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2992 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2993
2994 if (a->s) {
93418f1c
RH
2995 TCGv_i32 t = tcg_temp_new_i32();
2996 fn_s(t, d, n, m, g, desc);
35da316f 2997 do_pred_flags(t);
93418f1c 2998 tcg_temp_free_i32(t);
35da316f 2999 } else {
93418f1c 3000 fn(d, n, m, g, desc);
35da316f
RH
3001 }
3002 tcg_temp_free_ptr(d);
3003 tcg_temp_free_ptr(n);
3004 tcg_temp_free_ptr(m);
3005 tcg_temp_free_ptr(g);
35da316f
RH
3006 return true;
3007}
3008
3009static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3010 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3011{
3012 if (!sve_access_check(s)) {
3013 return true;
3014 }
3015
3016 unsigned vsz = pred_full_reg_size(s);
3017
3018 /* Predicate sizes may be smaller and cannot use simd_desc. */
3019 TCGv_ptr d = tcg_temp_new_ptr();
3020 TCGv_ptr n = tcg_temp_new_ptr();
3021 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3022 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3023
3024 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3025 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3026 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3027
3028 if (a->s) {
93418f1c
RH
3029 TCGv_i32 t = tcg_temp_new_i32();
3030 fn_s(t, d, n, g, desc);
35da316f 3031 do_pred_flags(t);
93418f1c 3032 tcg_temp_free_i32(t);
35da316f 3033 } else {
93418f1c 3034 fn(d, n, g, desc);
35da316f
RH
3035 }
3036 tcg_temp_free_ptr(d);
3037 tcg_temp_free_ptr(n);
3038 tcg_temp_free_ptr(g);
35da316f
RH
3039 return true;
3040}
3041
2224d24d
RH
3042TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3043 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3044TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3045 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3046
3047TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3048 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3049TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3050 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3051
3052TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3053 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3054TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3055 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3056
3057TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3058 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 3059
9ee3a611
RH
3060/*
3061 *** SVE Predicate Count Group
3062 */
3063
3064static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3065{
3066 unsigned psz = pred_full_reg_size(s);
3067
3068 if (psz <= 8) {
3069 uint64_t psz_mask;
3070
3071 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3072 if (pn != pg) {
3073 TCGv_i64 g = tcg_temp_new_i64();
3074 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3075 tcg_gen_and_i64(val, val, g);
3076 tcg_temp_free_i64(g);
3077 }
3078
3079 /* Reduce the pred_esz_masks value simply to reduce the
3080 * size of the code generated here.
3081 */
3082 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3083 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3084
3085 tcg_gen_ctpop_i64(val, val);
3086 } else {
3087 TCGv_ptr t_pn = tcg_temp_new_ptr();
3088 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3089 unsigned desc = 0;
9ee3a611 3090
f556a201
RH
3091 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3092 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3093
3094 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3095 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3096
c6a59b55 3097 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3098 tcg_temp_free_ptr(t_pn);
3099 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3100 }
3101}
3102
3a7be554 3103static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3104{
3105 if (sve_access_check(s)) {
3106 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3107 }
3108 return true;
3109}
3110
3a7be554 3111static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3112{
3113 if (sve_access_check(s)) {
3114 TCGv_i64 reg = cpu_reg(s, a->rd);
3115 TCGv_i64 val = tcg_temp_new_i64();
3116
3117 do_cntp(s, val, a->esz, a->pg, a->pg);
3118 if (a->d) {
3119 tcg_gen_sub_i64(reg, reg, val);
3120 } else {
3121 tcg_gen_add_i64(reg, reg, val);
3122 }
3123 tcg_temp_free_i64(val);
3124 }
3125 return true;
3126}
3127
3a7be554 3128static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3129{
3130 if (a->esz == 0) {
3131 return false;
3132 }
3133 if (sve_access_check(s)) {
3134 unsigned vsz = vec_full_reg_size(s);
3135 TCGv_i64 val = tcg_temp_new_i64();
3136 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3137
3138 do_cntp(s, val, a->esz, a->pg, a->pg);
3139 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3140 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3141 }
3142 return true;
3143}
3144
3a7be554 3145static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3146{
3147 if (sve_access_check(s)) {
3148 TCGv_i64 reg = cpu_reg(s, a->rd);
3149 TCGv_i64 val = tcg_temp_new_i64();
3150
3151 do_cntp(s, val, a->esz, a->pg, a->pg);
3152 do_sat_addsub_32(reg, val, a->u, a->d);
3153 }
3154 return true;
3155}
3156
3a7be554 3157static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3158{
3159 if (sve_access_check(s)) {
3160 TCGv_i64 reg = cpu_reg(s, a->rd);
3161 TCGv_i64 val = tcg_temp_new_i64();
3162
3163 do_cntp(s, val, a->esz, a->pg, a->pg);
3164 do_sat_addsub_64(reg, val, a->u, a->d);
3165 }
3166 return true;
3167}
3168
3a7be554 3169static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3170{
3171 if (a->esz == 0) {
3172 return false;
3173 }
3174 if (sve_access_check(s)) {
3175 TCGv_i64 val = tcg_temp_new_i64();
3176 do_cntp(s, val, a->esz, a->pg, a->pg);
3177 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3178 }
3179 return true;
3180}
3181
caf1cefc
RH
3182/*
3183 *** SVE Integer Compare Scalars Group
3184 */
3185
3a7be554 3186static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3187{
3188 if (!sve_access_check(s)) {
3189 return true;
3190 }
3191
3192 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3193 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3194 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3195 TCGv_i64 cmp = tcg_temp_new_i64();
3196
3197 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3198 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3199 tcg_temp_free_i64(cmp);
3200
3201 /* VF = !NF & !CF. */
3202 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3203 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3204
3205 /* Both NF and VF actually look at bit 31. */
3206 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3207 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3208 return true;
3209}
3210
3a7be554 3211static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3212{
bbd0968c 3213 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3214 TCGv_i32 t2;
caf1cefc 3215 TCGv_ptr ptr;
e610906c
RH
3216 unsigned vsz = vec_full_reg_size(s);
3217 unsigned desc = 0;
caf1cefc 3218 TCGCond cond;
34688dbc
RH
3219 uint64_t maxval;
3220 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3221 bool eq = a->eq == a->lt;
caf1cefc 3222
34688dbc
RH
3223 /* The greater-than conditions are all SVE2. */
3224 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3225 return false;
3226 }
bbd0968c
RH
3227 if (!sve_access_check(s)) {
3228 return true;
3229 }
3230
3231 op0 = read_cpu_reg(s, a->rn, 1);
3232 op1 = read_cpu_reg(s, a->rm, 1);
3233
caf1cefc
RH
3234 if (!a->sf) {
3235 if (a->u) {
3236 tcg_gen_ext32u_i64(op0, op0);
3237 tcg_gen_ext32u_i64(op1, op1);
3238 } else {
3239 tcg_gen_ext32s_i64(op0, op0);
3240 tcg_gen_ext32s_i64(op1, op1);
3241 }
3242 }
3243
3244 /* For the helper, compress the different conditions into a computation
3245 * of how many iterations for which the condition is true.
caf1cefc 3246 */
bbd0968c
RH
3247 t0 = tcg_temp_new_i64();
3248 t1 = tcg_temp_new_i64();
34688dbc
RH
3249
3250 if (a->lt) {
3251 tcg_gen_sub_i64(t0, op1, op0);
3252 if (a->u) {
3253 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3254 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3255 } else {
3256 maxval = a->sf ? INT64_MAX : INT32_MAX;
3257 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3258 }
3259 } else {
3260 tcg_gen_sub_i64(t0, op0, op1);
3261 if (a->u) {
3262 maxval = 0;
3263 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3264 } else {
3265 maxval = a->sf ? INT64_MIN : INT32_MIN;
3266 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3267 }
3268 }
caf1cefc 3269
4481bbf2 3270 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3271 if (eq) {
caf1cefc
RH
3272 /* Equality means one more iteration. */
3273 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3274
34688dbc
RH
3275 /*
3276 * For the less-than while, if op1 is maxval (and the only time
3277 * the addition above could overflow), then we produce an all-true
3278 * predicate by setting the count to the vector length. This is
3279 * because the pseudocode is described as an increment + compare
3280 * loop, and the maximum integer would always compare true.
3281 * Similarly, the greater-than while has the same issue with the
3282 * minimum integer due to the decrement + compare loop.
bbd0968c 3283 */
34688dbc 3284 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3285 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3286 }
3287
bbd0968c
RH
3288 /* Bound to the maximum. */
3289 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3290
3291 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3292 tcg_gen_movi_i64(t1, 0);
3293 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3294 tcg_temp_free_i64(t1);
caf1cefc 3295
bbd0968c 3296 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3297 t2 = tcg_temp_new_i32();
3298 tcg_gen_extrl_i64_i32(t2, t0);
3299 tcg_temp_free_i64(t0);
bbd0968c
RH
3300
3301 /* Scale elements to bits. */
3302 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3303
e610906c
RH
3304 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3305 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3306
3307 ptr = tcg_temp_new_ptr();
3308 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3309
34688dbc 3310 if (a->lt) {
4481bbf2 3311 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3312 } else {
4481bbf2 3313 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3314 }
caf1cefc
RH
3315 do_pred_flags(t2);
3316
3317 tcg_temp_free_ptr(ptr);
3318 tcg_temp_free_i32(t2);
caf1cefc
RH
3319 return true;
3320}
3321
14f6dad1
RH
3322static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3323{
3324 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3325 TCGv_i32 t2;
14f6dad1
RH
3326 TCGv_ptr ptr;
3327 unsigned vsz = vec_full_reg_size(s);
3328 unsigned desc = 0;
3329
3330 if (!dc_isar_feature(aa64_sve2, s)) {
3331 return false;
3332 }
3333 if (!sve_access_check(s)) {
3334 return true;
3335 }
3336
3337 op0 = read_cpu_reg(s, a->rn, 1);
3338 op1 = read_cpu_reg(s, a->rm, 1);
3339
4481bbf2 3340 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3341 diff = tcg_temp_new_i64();
3342
3343 if (a->rw) {
3344 /* WHILERW */
3345 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3346 t1 = tcg_temp_new_i64();
3347 tcg_gen_sub_i64(diff, op0, op1);
3348 tcg_gen_sub_i64(t1, op1, op0);
3349 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3350 tcg_temp_free_i64(t1);
3351 /* Round down to a multiple of ESIZE. */
3352 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3353 /* If op1 == op0, diff == 0, and the condition is always true. */
3354 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3355 } else {
3356 /* WHILEWR */
3357 tcg_gen_sub_i64(diff, op1, op0);
3358 /* Round down to a multiple of ESIZE. */
3359 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3360 /* If op0 >= op1, diff <= 0, the condition is always true. */
3361 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3362 }
3363
3364 /* Bound to the maximum. */
3365 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3366
3367 /* Since we're bounded, pass as a 32-bit type. */
3368 t2 = tcg_temp_new_i32();
3369 tcg_gen_extrl_i64_i32(t2, diff);
3370 tcg_temp_free_i64(diff);
3371
3372 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3373 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3374
3375 ptr = tcg_temp_new_ptr();
3376 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3377
4481bbf2 3378 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3379 do_pred_flags(t2);
3380
3381 tcg_temp_free_ptr(ptr);
3382 tcg_temp_free_i32(t2);
14f6dad1
RH
3383 return true;
3384}
3385
ed491961
RH
3386/*
3387 *** SVE Integer Wide Immediate - Unpredicated Group
3388 */
3389
3a7be554 3390static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3391{
3392 if (a->esz == 0) {
3393 return false;
3394 }
3395 if (sve_access_check(s)) {
3396 unsigned vsz = vec_full_reg_size(s);
3397 int dofs = vec_full_reg_offset(s, a->rd);
3398 uint64_t imm;
3399
3400 /* Decode the VFP immediate. */
3401 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3402 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3403 }
3404 return true;
3405}
3406
3a7be554 3407static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3408{
ed491961
RH
3409 if (sve_access_check(s)) {
3410 unsigned vsz = vec_full_reg_size(s);
3411 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3412 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3413 }
3414 return true;
3415}
3416
48ca613d 3417TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3418
3a7be554 3419static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3420{
3421 a->imm = -a->imm;
3a7be554 3422 return trans_ADD_zzi(s, a);
6e6a157d
RH
3423}
3424
3a7be554 3425static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3426{
53229a77 3427 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3428 static const GVecGen2s op[4] = {
3429 { .fni8 = tcg_gen_vec_sub8_i64,
3430 .fniv = tcg_gen_sub_vec,
3431 .fno = gen_helper_sve_subri_b,
53229a77 3432 .opt_opc = vecop_list,
6e6a157d
RH
3433 .vece = MO_8,
3434 .scalar_first = true },
3435 { .fni8 = tcg_gen_vec_sub16_i64,
3436 .fniv = tcg_gen_sub_vec,
3437 .fno = gen_helper_sve_subri_h,
53229a77 3438 .opt_opc = vecop_list,
6e6a157d
RH
3439 .vece = MO_16,
3440 .scalar_first = true },
3441 { .fni4 = tcg_gen_sub_i32,
3442 .fniv = tcg_gen_sub_vec,
3443 .fno = gen_helper_sve_subri_s,
53229a77 3444 .opt_opc = vecop_list,
6e6a157d
RH
3445 .vece = MO_32,
3446 .scalar_first = true },
3447 { .fni8 = tcg_gen_sub_i64,
3448 .fniv = tcg_gen_sub_vec,
3449 .fno = gen_helper_sve_subri_d,
53229a77 3450 .opt_opc = vecop_list,
6e6a157d
RH
3451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3452 .vece = MO_64,
3453 .scalar_first = true }
3454 };
3455
6e6a157d
RH
3456 if (sve_access_check(s)) {
3457 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3458 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3459 vec_full_reg_offset(s, a->rn),
9fff3fcc 3460 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3461 }
3462 return true;
3463}
3464
fa4bd72c 3465TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3466
3a7be554 3467static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3468{
6e6a157d 3469 if (sve_access_check(s)) {
138a1f7b
RH
3470 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3471 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3472 }
3473 return true;
3474}
3475
17b54d1c
RH
3476TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3477TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3478TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3479TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3480
3481static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3482{
3483 if (sve_access_check(s)) {
3484 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3485 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3486 vec_full_reg_offset(s, a->rn),
138a1f7b 3487 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3488 }
3489 return true;
3490}
3491
3492#define DO_ZZI(NAME, name) \
ef4a3958 3493 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3494 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3495 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3496 }; \
ef4a3958 3497 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3498
3499DO_ZZI(SMAX, smax)
3500DO_ZZI(UMAX, umax)
3501DO_ZZI(SMIN, smin)
3502DO_ZZI(UMIN, umin)
3503
3504#undef DO_ZZI
3505
5f425b92
RH
3506static gen_helper_gvec_4 * const dot_fns[2][2] = {
3507 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3508 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3509};
3510TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3511 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3512
814d4c52
RH
3513/*
3514 * SVE Multiply - Indexed
3515 */
3516
f3500a25
RH
3517TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3518 gen_helper_gvec_sdot_idx_b, a)
3519TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3520 gen_helper_gvec_sdot_idx_h, a)
3521TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3522 gen_helper_gvec_udot_idx_b, a)
3523TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3524 gen_helper_gvec_udot_idx_h, a)
3525
3526TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3527 gen_helper_gvec_sudot_idx_b, a)
3528TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3529 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3530
814d4c52 3531#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3532 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3533 a->rd, a->rn, a->rm, a->index)
814d4c52 3534
af031f64
RH
3535DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3536DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3537DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3538
af031f64
RH
3539DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3540DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3541DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3542
af031f64
RH
3543DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3544DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3545DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3546
814d4c52
RH
3547#undef DO_SVE2_RRX
3548
b95f5eeb 3549#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3550 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3551 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3552
3553DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3554DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3555DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3556DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3557
3558DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3559DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3560DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3561DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3562
3563DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3564DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3565DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3566DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3567
b95f5eeb
RH
3568#undef DO_SVE2_RRX_TB
3569
8a02aac7 3570#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3571 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3572
8681eb76
RH
3573DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3574DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3575DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3576
8681eb76
RH
3577DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3578DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3579DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3580
8681eb76
RH
3581DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3582DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3583DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3584
8681eb76
RH
3585DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3586DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3587DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3588
8a02aac7
RH
3589#undef DO_SVE2_RRXR
3590
c5c455d7 3591#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3592 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3593 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3594
3595DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3596DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3597DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3598DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3599
3600DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3601DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3602DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3603DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3604
3605DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3606DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3607DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3608DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3609
3610DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3611DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3612DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3613DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3614
3615DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3616DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3617DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3618DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3619
3620DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3621DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3622DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3623DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3624
3625#undef DO_SVE2_RRXR_TB
3626
3b787ed8 3627#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3628 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3629 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3630
3631DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3632DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3633
3634DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3635DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3636
21068f39
RH
3637DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3638DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3639
3b787ed8
RH
3640#undef DO_SVE2_RRXR_ROT
3641
ca40a6e6
RH
3642/*
3643 *** SVE Floating Point Multiply-Add Indexed Group
3644 */
3645
0a82d963 3646static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3647{
41bf9b67
RH
3648 static gen_helper_gvec_4_ptr * const fns[4] = {
3649 NULL,
ca40a6e6
RH
3650 gen_helper_gvec_fmla_idx_h,
3651 gen_helper_gvec_fmla_idx_s,
3652 gen_helper_gvec_fmla_idx_d,
3653 };
41bf9b67
RH
3654 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3655 (a->index << 1) | sub,
3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3657}
3658
3b879c28
RH
3659TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3660TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
0a82d963 3661
ca40a6e6
RH
3662/*
3663 *** SVE Floating Point Multiply Indexed Group
3664 */
3665
9c99ef66
RH
3666static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3667 NULL, gen_helper_gvec_fmul_idx_h,
3668 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3669};
3670TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3671 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3672 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
ca40a6e6 3673
23fbe79f
RH
3674/*
3675 *** SVE Floating Point Fast Reduction Group
3676 */
3677
3678typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3679 TCGv_ptr, TCGv_i32);
3680
5ce18efe 3681static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
23fbe79f
RH
3682 gen_helper_fp_reduce *fn)
3683{
5ce18efe
RH
3684 unsigned vsz, p2vsz;
3685 TCGv_i32 t_desc;
23fbe79f
RH
3686 TCGv_ptr t_zn, t_pg, status;
3687 TCGv_i64 temp;
3688
5ce18efe
RH
3689 if (fn == NULL) {
3690 return false;
3691 }
3692 if (!sve_access_check(s)) {
3693 return true;
3694 }
3695
3696 vsz = vec_full_reg_size(s);
3697 p2vsz = pow2ceil(vsz);
3698 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3699 temp = tcg_temp_new_i64();
3700 t_zn = tcg_temp_new_ptr();
3701 t_pg = tcg_temp_new_ptr();
3702
3703 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3704 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3705 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3706
3707 fn(temp, t_zn, t_pg, status, t_desc);
3708 tcg_temp_free_ptr(t_zn);
3709 tcg_temp_free_ptr(t_pg);
3710 tcg_temp_free_ptr(status);
23fbe79f
RH
3711
3712 write_fp_dreg(s, a->rd, temp);
3713 tcg_temp_free_i64(temp);
5ce18efe 3714 return true;
23fbe79f
RH
3715}
3716
3717#define DO_VPZ(NAME, name) \
8003e7cf
RH
3718 static gen_helper_fp_reduce * const name##_fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
23fbe79f 3721 }; \
8003e7cf 3722 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
23fbe79f
RH
3723
3724DO_VPZ(FADDV, faddv)
3725DO_VPZ(FMINNMV, fminnmv)
3726DO_VPZ(FMAXNMV, fmaxnmv)
3727DO_VPZ(FMINV, fminv)
3728DO_VPZ(FMAXV, fmaxv)
3729
8003e7cf
RH
3730#undef DO_VPZ
3731
3887c038
RH
3732/*
3733 *** SVE Floating Point Unary Operations - Unpredicated Group
3734 */
3735
de58c6b0
RH
3736static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3737 NULL, gen_helper_gvec_frecpe_h,
3738 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3739};
3740TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3887c038 3741
de58c6b0
RH
3742static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3743 NULL, gen_helper_gvec_frsqrte_h,
3744 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3745};
3746TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3887c038 3747
4d2e2a03
RH
3748/*
3749 *** SVE Floating Point Compare with Zero Group
3750 */
3751
63d6aef8 3752static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4d2e2a03
RH
3753 gen_helper_gvec_3_ptr *fn)
3754{
63d6aef8
RH
3755 if (fn == NULL) {
3756 return false;
3757 }
3758 if (sve_access_check(s)) {
3759 unsigned vsz = vec_full_reg_size(s);
3760 TCGv_ptr status =
3761 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03 3762
63d6aef8
RH
3763 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3764 vec_full_reg_offset(s, a->rn),
3765 pred_full_reg_offset(s, a->pg),
3766 status, vsz, vsz, 0, fn);
3767 tcg_temp_free_ptr(status);
3768 }
3769 return true;
4d2e2a03
RH
3770}
3771
3772#define DO_PPZ(NAME, name) \
63d6aef8
RH
3773 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3774 NULL, gen_helper_sve_##name##_h, \
3775 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
4d2e2a03 3776 }; \
63d6aef8 3777 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
4d2e2a03
RH
3778
3779DO_PPZ(FCMGE_ppz0, fcmge0)
3780DO_PPZ(FCMGT_ppz0, fcmgt0)
3781DO_PPZ(FCMLE_ppz0, fcmle0)
3782DO_PPZ(FCMLT_ppz0, fcmlt0)
3783DO_PPZ(FCMEQ_ppz0, fcmeq0)
3784DO_PPZ(FCMNE_ppz0, fcmne0)
3785
3786#undef DO_PPZ
3787
67fcd9ad
RH
3788/*
3789 *** SVE floating-point trig multiply-add coefficient
3790 */
3791
cdd85923
RH
3792static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3793 NULL, gen_helper_sve_ftmad_h,
3794 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3795};
3796TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3797 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3798 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
67fcd9ad 3799
7f9ddf64
RH
3800/*
3801 *** SVE Floating Point Accumulating Reduction Group
3802 */
3803
3a7be554 3804static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3805{
3806 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3807 TCGv_ptr, TCGv_ptr, TCGv_i32);
3808 static fadda_fn * const fns[3] = {
3809 gen_helper_sve_fadda_h,
3810 gen_helper_sve_fadda_s,
3811 gen_helper_sve_fadda_d,
3812 };
3813 unsigned vsz = vec_full_reg_size(s);
3814 TCGv_ptr t_rm, t_pg, t_fpst;
3815 TCGv_i64 t_val;
3816 TCGv_i32 t_desc;
3817
3818 if (a->esz == 0) {
3819 return false;
3820 }
3821 if (!sve_access_check(s)) {
3822 return true;
3823 }
3824
3825 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3826 t_rm = tcg_temp_new_ptr();
3827 t_pg = tcg_temp_new_ptr();
3828 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3829 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3830 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3831 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3832
3833 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3834
7f9ddf64
RH
3835 tcg_temp_free_ptr(t_fpst);
3836 tcg_temp_free_ptr(t_pg);
3837 tcg_temp_free_ptr(t_rm);
3838
3839 write_fp_dreg(s, a->rd, t_val);
3840 tcg_temp_free_i64(t_val);
3841 return true;
3842}
3843
29b80469
RH
3844/*
3845 *** SVE Floating Point Arithmetic - Unpredicated Group
3846 */
3847
29b80469 3848#define DO_FP3(NAME, name) \
bdd4ce0d 3849 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
29b80469
RH
3850 NULL, gen_helper_gvec_##name##_h, \
3851 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3852 }; \
bdd4ce0d 3853 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
29b80469
RH
3854
3855DO_FP3(FADD_zzz, fadd)
3856DO_FP3(FSUB_zzz, fsub)
3857DO_FP3(FMUL_zzz, fmul)
3858DO_FP3(FTSMUL, ftsmul)
3859DO_FP3(FRECPS, recps)
3860DO_FP3(FRSQRTS, rsqrts)
3861
3862#undef DO_FP3
3863
ec3b87c2
RH
3864/*
3865 *** SVE Floating Point Arithmetic - Predicated Group
3866 */
3867
7de2617b
RH
3868#define DO_ZPZZ_FP(NAME, FEAT, name) \
3869 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3870 NULL, gen_helper_##name##_h, \
3871 gen_helper_##name##_s, gen_helper_##name##_d \
3872 }; \
3873 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3874
3875DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3876DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3877DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3878DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3879DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3880DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3881DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3882DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3883DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3884DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3885DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
8092c6a3 3886
cc48affe
RH
3887typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3888 TCGv_i64, TCGv_ptr, TCGv_i32);
3889
3890static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3891 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3892{
3893 unsigned vsz = vec_full_reg_size(s);
3894 TCGv_ptr t_zd, t_zn, t_pg, status;
3895 TCGv_i32 desc;
3896
3897 t_zd = tcg_temp_new_ptr();
3898 t_zn = tcg_temp_new_ptr();
3899 t_pg = tcg_temp_new_ptr();
3900 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3901 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3902 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3903
cdfb22bb 3904 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3905 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
3906 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3907
cc48affe
RH
3908 tcg_temp_free_ptr(status);
3909 tcg_temp_free_ptr(t_pg);
3910 tcg_temp_free_ptr(t_zn);
3911 tcg_temp_free_ptr(t_zd);
3912}
3913
413ee8e4 3914static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
cc48affe
RH
3915 gen_helper_sve_fp2scalar *fn)
3916{
413ee8e4
RH
3917 if (fn == NULL) {
3918 return false;
3919 }
3920 if (sve_access_check(s)) {
3921 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3922 tcg_constant_i64(imm), fn);
3923 }
3924 return true;
cc48affe
RH
3925}
3926
98c37459
RH
3927#define DO_FP_IMM(NAME, name, const0, const1) \
3928 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
3929 NULL, gen_helper_sve_##name##_h, \
3930 gen_helper_sve_##name##_s, \
3931 gen_helper_sve_##name##_d \
3932 }; \
3933 static uint64_t const name##_const[4][2] = { \
3934 { -1, -1 }, \
3935 { float16_##const0, float16_##const1 }, \
3936 { float32_##const0, float32_##const1 }, \
3937 { float64_##const0, float64_##const1 }, \
3938 }; \
3939 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
3940 name##_const[a->esz][a->imm], name##_fns[a->esz])
cc48affe 3941
cc48affe
RH
3942DO_FP_IMM(FADD, fadds, half, one)
3943DO_FP_IMM(FSUB, fsubs, half, one)
3944DO_FP_IMM(FMUL, fmuls, half, two)
3945DO_FP_IMM(FSUBR, fsubrs, half, one)
3946DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3947DO_FP_IMM(FMINNM, fminnms, zero, one)
3948DO_FP_IMM(FMAX, fmaxs, zero, one)
3949DO_FP_IMM(FMIN, fmins, zero, one)
3950
3951#undef DO_FP_IMM
3952
abfdefd5
RH
3953static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3954 gen_helper_gvec_4_ptr *fn)
3955{
3956 if (fn == NULL) {
3957 return false;
3958 }
3959 if (sve_access_check(s)) {
3960 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3961 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3962 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3963 vec_full_reg_offset(s, a->rn),
3964 vec_full_reg_offset(s, a->rm),
3965 pred_full_reg_offset(s, a->pg),
3966 status, vsz, vsz, 0, fn);
3967 tcg_temp_free_ptr(status);
3968 }
3969 return true;
3970}
3971
3972#define DO_FPCMP(NAME, name) \
d961b3e4 3973 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
abfdefd5
RH
3974 NULL, gen_helper_sve_##name##_h, \
3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3976 }; \
d961b3e4 3977 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
abfdefd5
RH
3978
3979DO_FPCMP(FCMGE, fcmge)
3980DO_FPCMP(FCMGT, fcmgt)
3981DO_FPCMP(FCMEQ, fcmeq)
3982DO_FPCMP(FCMNE, fcmne)
3983DO_FPCMP(FCMUO, fcmuo)
3984DO_FPCMP(FACGE, facge)
3985DO_FPCMP(FACGT, facgt)
3986
3987#undef DO_FPCMP
3988
6f5cd670
RH
3989static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3990 NULL, gen_helper_sve_fcadd_h,
3991 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3992};
3993TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3994 a->rd, a->rn, a->rm, a->pg, a->rot,
3995 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
76a9d9cd 3996
6ceabaad 3997#define DO_FMLA(NAME, name) \
498be5b8
RH
3998 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
3999 NULL, gen_helper_sve_##name##_h, \
4000 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4001 }; \
4002 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4003 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4004 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
6ceabaad
RH
4005
4006DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4007DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4008DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4009DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4010
4011#undef DO_FMLA
4012
498be5b8
RH
4013static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4014 NULL, gen_helper_sve_fcmla_zpzzz_h,
4015 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4016};
4017TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4018 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
4019 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
05f48bab 4020
e600d649
RH
4021static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
4022 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
4023};
4024TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
4025 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
4026 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
18fc2405 4027
8092c6a3
RH
4028/*
4029 *** SVE Floating Point Unary Operations Predicated Group
4030 */
4031
0360730c
RH
4032TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4033 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4034TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4035 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4036
4037TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4038 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4039
4040TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4041 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4042TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4043 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4044TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4045 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4046TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4047 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4048
4049TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4050 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4051TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4052 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4053TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4054 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4055TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4056 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4057TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4058 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4059TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4060 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4061
4062TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4063 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4064TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4065 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4066TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4067 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4068TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4069 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4070TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4071 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4072TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4073 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4074
4075TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4076 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4077TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4078 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
df4de1af 4079
ed6bb6b4
RH
4080static gen_helper_gvec_3_ptr * const frint_fns[] = {
4081 NULL,
cda3c753
RH
4082 gen_helper_sve_frint_h,
4083 gen_helper_sve_frint_s,
4084 gen_helper_sve_frint_d
4085};
0360730c
RH
4086TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4087 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
cda3c753 4088
0360730c
RH
4089static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4090 NULL,
4091 gen_helper_sve_frintx_h,
4092 gen_helper_sve_frintx_s,
4093 gen_helper_sve_frintx_d
4094};
4095TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4096 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4097
95365277
SL
4098static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4099 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4100{
13c0dd17
RH
4101 unsigned vsz;
4102 TCGv_i32 tmode;
4103 TCGv_ptr status;
cda3c753 4104
13c0dd17
RH
4105 if (fn == NULL) {
4106 return false;
4107 }
4108 if (!sve_access_check(s)) {
4109 return true;
4110 }
cda3c753 4111
13c0dd17
RH
4112 vsz = vec_full_reg_size(s);
4113 tmode = tcg_const_i32(mode);
4114 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4115
13c0dd17
RH
4116 gen_helper_set_rmode(tmode, tmode, status);
4117
4118 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4119 vec_full_reg_offset(s, a->rn),
4120 pred_full_reg_offset(s, a->pg),
4121 status, vsz, vsz, 0, fn);
4122
4123 gen_helper_set_rmode(tmode, tmode, status);
4124 tcg_temp_free_i32(tmode);
4125 tcg_temp_free_ptr(status);
cda3c753
RH
4126 return true;
4127}
4128
27645836
RH
4129TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4130 float_round_nearest_even, frint_fns[a->esz])
4131TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4132 float_round_up, frint_fns[a->esz])
4133TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4134 float_round_down, frint_fns[a->esz])
4135TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4136 float_round_to_zero, frint_fns[a->esz])
4137TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4138 float_round_ties_away, frint_fns[a->esz])
cda3c753 4139
0360730c
RH
4140static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4141 NULL, gen_helper_sve_frecpx_h,
4142 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4143};
4144TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4145 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
8092c6a3 4146
0360730c
RH
4147static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4148 NULL, gen_helper_sve_fsqrt_h,
4149 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4150};
4151TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4152 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4153
4154TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4155 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4156TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4157 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4158TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4159 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4160
4161TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4162 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4163TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4164 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4165
4166TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4167 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4168TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4169 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4170
4171TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4172 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4173TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4174 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4175TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4176 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4177
4178TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4179 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4180TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4181 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4182TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4183 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4184
4185TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4186 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
8092c6a3 4187
d1822297
RH
4188/*
4189 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4190 */
4191
4192/* Subroutine loading a vector register at VOFS of LEN bytes.
4193 * The load should begin at the address Rn + IMM.
4194 */
4195
19f2acc9 4196static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4197{
19f2acc9
RH
4198 int len_align = QEMU_ALIGN_DOWN(len, 8);
4199 int len_remain = len % 8;
4200 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4201 int midx = get_mem_index(s);
b2aa8879 4202 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4203
b2aa8879
RH
4204 dirty_addr = tcg_temp_new_i64();
4205 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4206 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4207 tcg_temp_free_i64(dirty_addr);
d1822297 4208
b2aa8879
RH
4209 /*
4210 * Note that unpredicated load/store of vector/predicate registers
d1822297 4211 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4212 * operations on larger quantities.
d1822297
RH
4213 * Attempt to keep code expansion to a minimum by limiting the
4214 * amount of unrolling done.
4215 */
4216 if (nparts <= 4) {
4217 int i;
4218
b2aa8879 4219 t0 = tcg_temp_new_i64();
d1822297 4220 for (i = 0; i < len_align; i += 8) {
fc313c64 4221 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4222 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4223 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4224 }
b2aa8879 4225 tcg_temp_free_i64(t0);
d1822297
RH
4226 } else {
4227 TCGLabel *loop = gen_new_label();
4228 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4229
b2aa8879
RH
4230 /* Copy the clean address into a local temp, live across the loop. */
4231 t0 = clean_addr;
4b4dc975 4232 clean_addr = new_tmp_a64_local(s);
b2aa8879 4233 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4234
b2aa8879 4235 gen_set_label(loop);
d1822297 4236
b2aa8879 4237 t0 = tcg_temp_new_i64();
fc313c64 4238 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4239 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4240
b2aa8879 4241 tp = tcg_temp_new_ptr();
d1822297
RH
4242 tcg_gen_add_ptr(tp, cpu_env, i);
4243 tcg_gen_addi_ptr(i, i, 8);
4244 tcg_gen_st_i64(t0, tp, vofs);
4245 tcg_temp_free_ptr(tp);
b2aa8879 4246 tcg_temp_free_i64(t0);
d1822297
RH
4247
4248 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4249 tcg_temp_free_ptr(i);
4250 }
4251
b2aa8879
RH
4252 /*
4253 * Predicate register loads can be any multiple of 2.
d1822297
RH
4254 * Note that we still store the entire 64-bit unit into cpu_env.
4255 */
4256 if (len_remain) {
b2aa8879 4257 t0 = tcg_temp_new_i64();
d1822297
RH
4258 switch (len_remain) {
4259 case 2:
4260 case 4:
4261 case 8:
b2aa8879
RH
4262 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4263 MO_LE | ctz32(len_remain));
d1822297
RH
4264 break;
4265
4266 case 6:
4267 t1 = tcg_temp_new_i64();
b2aa8879
RH
4268 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4269 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4270 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4271 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4272 tcg_temp_free_i64(t1);
4273 break;
4274
4275 default:
4276 g_assert_not_reached();
4277 }
4278 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4279 tcg_temp_free_i64(t0);
d1822297 4280 }
d1822297
RH
4281}
4282
5047c204 4283/* Similarly for stores. */
19f2acc9 4284static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4285{
19f2acc9
RH
4286 int len_align = QEMU_ALIGN_DOWN(len, 8);
4287 int len_remain = len % 8;
4288 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4289 int midx = get_mem_index(s);
bba87d0a 4290 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4291
bba87d0a
RH
4292 dirty_addr = tcg_temp_new_i64();
4293 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4294 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4295 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4296
4297 /* Note that unpredicated load/store of vector/predicate registers
4298 * are defined as a stream of bytes, which equates to little-endian
4299 * operations on larger quantities. There is no nice way to force
4300 * a little-endian store for aarch64_be-linux-user out of line.
4301 *
4302 * Attempt to keep code expansion to a minimum by limiting the
4303 * amount of unrolling done.
4304 */
4305 if (nparts <= 4) {
4306 int i;
4307
bba87d0a 4308 t0 = tcg_temp_new_i64();
5047c204
RH
4309 for (i = 0; i < len_align; i += 8) {
4310 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4311 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4312 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4313 }
bba87d0a 4314 tcg_temp_free_i64(t0);
5047c204
RH
4315 } else {
4316 TCGLabel *loop = gen_new_label();
bba87d0a 4317 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4318
bba87d0a
RH
4319 /* Copy the clean address into a local temp, live across the loop. */
4320 t0 = clean_addr;
4b4dc975 4321 clean_addr = new_tmp_a64_local(s);
bba87d0a 4322 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4323
bba87d0a 4324 gen_set_label(loop);
5047c204 4325
bba87d0a
RH
4326 t0 = tcg_temp_new_i64();
4327 tp = tcg_temp_new_ptr();
4328 tcg_gen_add_ptr(tp, cpu_env, i);
4329 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4330 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4331 tcg_temp_free_ptr(tp);
4332
fc313c64 4333 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4334 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4335 tcg_temp_free_i64(t0);
5047c204
RH
4336
4337 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4338 tcg_temp_free_ptr(i);
4339 }
4340
4341 /* Predicate register stores can be any multiple of 2. */
4342 if (len_remain) {
bba87d0a 4343 t0 = tcg_temp_new_i64();
5047c204 4344 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4345
4346 switch (len_remain) {
4347 case 2:
4348 case 4:
4349 case 8:
bba87d0a
RH
4350 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4351 MO_LE | ctz32(len_remain));
5047c204
RH
4352 break;
4353
4354 case 6:
bba87d0a
RH
4355 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4356 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4357 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4358 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4359 break;
4360
4361 default:
4362 g_assert_not_reached();
4363 }
bba87d0a 4364 tcg_temp_free_i64(t0);
5047c204 4365 }
5047c204
RH
4366}
4367
3a7be554 4368static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4369{
4370 if (sve_access_check(s)) {
4371 int size = vec_full_reg_size(s);
4372 int off = vec_full_reg_offset(s, a->rd);
4373 do_ldr(s, off, size, a->rn, a->imm * size);
4374 }
4375 return true;
4376}
4377
3a7be554 4378static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4379{
4380 if (sve_access_check(s)) {
4381 int size = pred_full_reg_size(s);
4382 int off = pred_full_reg_offset(s, a->rd);
4383 do_ldr(s, off, size, a->rn, a->imm * size);
4384 }
4385 return true;
4386}
c4e7c493 4387
3a7be554 4388static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4389{
4390 if (sve_access_check(s)) {
4391 int size = vec_full_reg_size(s);
4392 int off = vec_full_reg_offset(s, a->rd);
4393 do_str(s, off, size, a->rn, a->imm * size);
4394 }
4395 return true;
4396}
4397
3a7be554 4398static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4399{
4400 if (sve_access_check(s)) {
4401 int size = pred_full_reg_size(s);
4402 int off = pred_full_reg_offset(s, a->rd);
4403 do_str(s, off, size, a->rn, a->imm * size);
4404 }
4405 return true;
4406}
4407
c4e7c493
RH
4408/*
4409 *** SVE Memory - Contiguous Load Group
4410 */
4411
4412/* The memory mode of the dtype. */
14776ab5 4413static const MemOp dtype_mop[16] = {
c4e7c493
RH
4414 MO_UB, MO_UB, MO_UB, MO_UB,
4415 MO_SL, MO_UW, MO_UW, MO_UW,
4416 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4417 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4418};
4419
4420#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4421
4422/* The vector element size of dtype. */
4423static const uint8_t dtype_esz[16] = {
4424 0, 1, 2, 3,
4425 3, 1, 2, 3,
4426 3, 2, 2, 3,
4427 3, 2, 1, 3
4428};
4429
4430static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4431 int dtype, uint32_t mte_n, bool is_write,
4432 gen_helper_gvec_mem *fn)
c4e7c493
RH
4433{
4434 unsigned vsz = vec_full_reg_size(s);
4435 TCGv_ptr t_pg;
206adacf 4436 int desc = 0;
c4e7c493 4437
206adacf
RH
4438 /*
4439 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4440 * registers as pointers, so encode the regno into the data field.
4441 * For consistency, do this even for LD1.
4442 */
9473d0ec 4443 if (s->mte_active[0]) {
206adacf
RH
4444 int msz = dtype_msz(dtype);
4445
4446 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4447 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4448 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4449 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4450 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4451 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4452 } else {
4453 addr = clean_data_tbi(s, addr);
206adacf 4454 }
9473d0ec 4455
206adacf 4456 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4457 t_pg = tcg_temp_new_ptr();
4458
4459 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4460 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4461
4462 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4463}
4464
c182c6db
RH
4465/* Indexed by [mte][be][dtype][nreg] */
4466static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4467 { /* mte inactive, little-endian */
4468 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4469 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4470 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4471 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4472 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4473
4474 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4475 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4476 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4477 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4478 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4479
4480 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4481 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4482 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4483 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4484 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4485
4486 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4487 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4488 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4489 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4490 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4491
4492 /* mte inactive, big-endian */
4493 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4494 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4495 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4496 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4497 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4498
4499 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4500 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4501 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4502 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4503 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4504
4505 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4506 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4507 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4508 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4509 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4510
4511 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4512 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4513 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4514 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4515 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4516
4517 { /* mte active, little-endian */
4518 { { gen_helper_sve_ld1bb_r_mte,
4519 gen_helper_sve_ld2bb_r_mte,
4520 gen_helper_sve_ld3bb_r_mte,
4521 gen_helper_sve_ld4bb_r_mte },
4522 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4523 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4524 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4525
4526 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4527 { gen_helper_sve_ld1hh_le_r_mte,
4528 gen_helper_sve_ld2hh_le_r_mte,
4529 gen_helper_sve_ld3hh_le_r_mte,
4530 gen_helper_sve_ld4hh_le_r_mte },
4531 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4532 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4533
4534 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4535 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4536 { gen_helper_sve_ld1ss_le_r_mte,
4537 gen_helper_sve_ld2ss_le_r_mte,
4538 gen_helper_sve_ld3ss_le_r_mte,
4539 gen_helper_sve_ld4ss_le_r_mte },
4540 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4541
4542 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4543 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4544 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4545 { gen_helper_sve_ld1dd_le_r_mte,
4546 gen_helper_sve_ld2dd_le_r_mte,
4547 gen_helper_sve_ld3dd_le_r_mte,
4548 gen_helper_sve_ld4dd_le_r_mte } },
4549
4550 /* mte active, big-endian */
4551 { { gen_helper_sve_ld1bb_r_mte,
4552 gen_helper_sve_ld2bb_r_mte,
4553 gen_helper_sve_ld3bb_r_mte,
4554 gen_helper_sve_ld4bb_r_mte },
4555 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4557 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4558
4559 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4560 { gen_helper_sve_ld1hh_be_r_mte,
4561 gen_helper_sve_ld2hh_be_r_mte,
4562 gen_helper_sve_ld3hh_be_r_mte,
4563 gen_helper_sve_ld4hh_be_r_mte },
4564 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4566
4567 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4568 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4569 { gen_helper_sve_ld1ss_be_r_mte,
4570 gen_helper_sve_ld2ss_be_r_mte,
4571 gen_helper_sve_ld3ss_be_r_mte,
4572 gen_helper_sve_ld4ss_be_r_mte },
4573 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4574
4575 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4577 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4578 { gen_helper_sve_ld1dd_be_r_mte,
4579 gen_helper_sve_ld2dd_be_r_mte,
4580 gen_helper_sve_ld3dd_be_r_mte,
4581 gen_helper_sve_ld4dd_be_r_mte } } },
4582};
4583
c4e7c493
RH
4584static void do_ld_zpa(DisasContext *s, int zt, int pg,
4585 TCGv_i64 addr, int dtype, int nreg)
4586{
206adacf 4587 gen_helper_gvec_mem *fn
c182c6db 4588 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4589
206adacf
RH
4590 /*
4591 * While there are holes in the table, they are not
c4e7c493
RH
4592 * accessible via the instruction encoding.
4593 */
4594 assert(fn != NULL);
206adacf 4595 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4596}
4597
3a7be554 4598static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4599{
4600 if (a->rm == 31) {
4601 return false;
4602 }
4603 if (sve_access_check(s)) {
4604 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4605 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4606 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4607 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4608 }
4609 return true;
4610}
4611
3a7be554 4612static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4613{
4614 if (sve_access_check(s)) {
4615 int vsz = vec_full_reg_size(s);
4616 int elements = vsz >> dtype_esz[a->dtype];
4617 TCGv_i64 addr = new_tmp_a64(s);
4618
4619 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4620 (a->imm * elements * (a->nreg + 1))
4621 << dtype_msz(a->dtype));
4622 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4623 }
4624 return true;
4625}
e2654d75 4626
3a7be554 4627static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4628{
aa13f7c3
RH
4629 static gen_helper_gvec_mem * const fns[2][2][16] = {
4630 { /* mte inactive, little-endian */
4631 { gen_helper_sve_ldff1bb_r,
4632 gen_helper_sve_ldff1bhu_r,
4633 gen_helper_sve_ldff1bsu_r,
4634 gen_helper_sve_ldff1bdu_r,
4635
4636 gen_helper_sve_ldff1sds_le_r,
4637 gen_helper_sve_ldff1hh_le_r,
4638 gen_helper_sve_ldff1hsu_le_r,
4639 gen_helper_sve_ldff1hdu_le_r,
4640
4641 gen_helper_sve_ldff1hds_le_r,
4642 gen_helper_sve_ldff1hss_le_r,
4643 gen_helper_sve_ldff1ss_le_r,
4644 gen_helper_sve_ldff1sdu_le_r,
4645
4646 gen_helper_sve_ldff1bds_r,
4647 gen_helper_sve_ldff1bss_r,
4648 gen_helper_sve_ldff1bhs_r,
4649 gen_helper_sve_ldff1dd_le_r },
4650
4651 /* mte inactive, big-endian */
4652 { gen_helper_sve_ldff1bb_r,
4653 gen_helper_sve_ldff1bhu_r,
4654 gen_helper_sve_ldff1bsu_r,
4655 gen_helper_sve_ldff1bdu_r,
4656
4657 gen_helper_sve_ldff1sds_be_r,
4658 gen_helper_sve_ldff1hh_be_r,
4659 gen_helper_sve_ldff1hsu_be_r,
4660 gen_helper_sve_ldff1hdu_be_r,
4661
4662 gen_helper_sve_ldff1hds_be_r,
4663 gen_helper_sve_ldff1hss_be_r,
4664 gen_helper_sve_ldff1ss_be_r,
4665 gen_helper_sve_ldff1sdu_be_r,
4666
4667 gen_helper_sve_ldff1bds_r,
4668 gen_helper_sve_ldff1bss_r,
4669 gen_helper_sve_ldff1bhs_r,
4670 gen_helper_sve_ldff1dd_be_r } },
4671
4672 { /* mte active, little-endian */
4673 { gen_helper_sve_ldff1bb_r_mte,
4674 gen_helper_sve_ldff1bhu_r_mte,
4675 gen_helper_sve_ldff1bsu_r_mte,
4676 gen_helper_sve_ldff1bdu_r_mte,
4677
4678 gen_helper_sve_ldff1sds_le_r_mte,
4679 gen_helper_sve_ldff1hh_le_r_mte,
4680 gen_helper_sve_ldff1hsu_le_r_mte,
4681 gen_helper_sve_ldff1hdu_le_r_mte,
4682
4683 gen_helper_sve_ldff1hds_le_r_mte,
4684 gen_helper_sve_ldff1hss_le_r_mte,
4685 gen_helper_sve_ldff1ss_le_r_mte,
4686 gen_helper_sve_ldff1sdu_le_r_mte,
4687
4688 gen_helper_sve_ldff1bds_r_mte,
4689 gen_helper_sve_ldff1bss_r_mte,
4690 gen_helper_sve_ldff1bhs_r_mte,
4691 gen_helper_sve_ldff1dd_le_r_mte },
4692
4693 /* mte active, big-endian */
4694 { gen_helper_sve_ldff1bb_r_mte,
4695 gen_helper_sve_ldff1bhu_r_mte,
4696 gen_helper_sve_ldff1bsu_r_mte,
4697 gen_helper_sve_ldff1bdu_r_mte,
4698
4699 gen_helper_sve_ldff1sds_be_r_mte,
4700 gen_helper_sve_ldff1hh_be_r_mte,
4701 gen_helper_sve_ldff1hsu_be_r_mte,
4702 gen_helper_sve_ldff1hdu_be_r_mte,
4703
4704 gen_helper_sve_ldff1hds_be_r_mte,
4705 gen_helper_sve_ldff1hss_be_r_mte,
4706 gen_helper_sve_ldff1ss_be_r_mte,
4707 gen_helper_sve_ldff1sdu_be_r_mte,
4708
4709 gen_helper_sve_ldff1bds_r_mte,
4710 gen_helper_sve_ldff1bss_r_mte,
4711 gen_helper_sve_ldff1bhs_r_mte,
4712 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4713 };
4714
4715 if (sve_access_check(s)) {
4716 TCGv_i64 addr = new_tmp_a64(s);
4717 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4718 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4719 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4720 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4721 }
4722 return true;
4723}
4724
3a7be554 4725static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4726{
aa13f7c3
RH
4727 static gen_helper_gvec_mem * const fns[2][2][16] = {
4728 { /* mte inactive, little-endian */
4729 { gen_helper_sve_ldnf1bb_r,
4730 gen_helper_sve_ldnf1bhu_r,
4731 gen_helper_sve_ldnf1bsu_r,
4732 gen_helper_sve_ldnf1bdu_r,
4733
4734 gen_helper_sve_ldnf1sds_le_r,
4735 gen_helper_sve_ldnf1hh_le_r,
4736 gen_helper_sve_ldnf1hsu_le_r,
4737 gen_helper_sve_ldnf1hdu_le_r,
4738
4739 gen_helper_sve_ldnf1hds_le_r,
4740 gen_helper_sve_ldnf1hss_le_r,
4741 gen_helper_sve_ldnf1ss_le_r,
4742 gen_helper_sve_ldnf1sdu_le_r,
4743
4744 gen_helper_sve_ldnf1bds_r,
4745 gen_helper_sve_ldnf1bss_r,
4746 gen_helper_sve_ldnf1bhs_r,
4747 gen_helper_sve_ldnf1dd_le_r },
4748
4749 /* mte inactive, big-endian */
4750 { gen_helper_sve_ldnf1bb_r,
4751 gen_helper_sve_ldnf1bhu_r,
4752 gen_helper_sve_ldnf1bsu_r,
4753 gen_helper_sve_ldnf1bdu_r,
4754
4755 gen_helper_sve_ldnf1sds_be_r,
4756 gen_helper_sve_ldnf1hh_be_r,
4757 gen_helper_sve_ldnf1hsu_be_r,
4758 gen_helper_sve_ldnf1hdu_be_r,
4759
4760 gen_helper_sve_ldnf1hds_be_r,
4761 gen_helper_sve_ldnf1hss_be_r,
4762 gen_helper_sve_ldnf1ss_be_r,
4763 gen_helper_sve_ldnf1sdu_be_r,
4764
4765 gen_helper_sve_ldnf1bds_r,
4766 gen_helper_sve_ldnf1bss_r,
4767 gen_helper_sve_ldnf1bhs_r,
4768 gen_helper_sve_ldnf1dd_be_r } },
4769
4770 { /* mte inactive, little-endian */
4771 { gen_helper_sve_ldnf1bb_r_mte,
4772 gen_helper_sve_ldnf1bhu_r_mte,
4773 gen_helper_sve_ldnf1bsu_r_mte,
4774 gen_helper_sve_ldnf1bdu_r_mte,
4775
4776 gen_helper_sve_ldnf1sds_le_r_mte,
4777 gen_helper_sve_ldnf1hh_le_r_mte,
4778 gen_helper_sve_ldnf1hsu_le_r_mte,
4779 gen_helper_sve_ldnf1hdu_le_r_mte,
4780
4781 gen_helper_sve_ldnf1hds_le_r_mte,
4782 gen_helper_sve_ldnf1hss_le_r_mte,
4783 gen_helper_sve_ldnf1ss_le_r_mte,
4784 gen_helper_sve_ldnf1sdu_le_r_mte,
4785
4786 gen_helper_sve_ldnf1bds_r_mte,
4787 gen_helper_sve_ldnf1bss_r_mte,
4788 gen_helper_sve_ldnf1bhs_r_mte,
4789 gen_helper_sve_ldnf1dd_le_r_mte },
4790
4791 /* mte inactive, big-endian */
4792 { gen_helper_sve_ldnf1bb_r_mte,
4793 gen_helper_sve_ldnf1bhu_r_mte,
4794 gen_helper_sve_ldnf1bsu_r_mte,
4795 gen_helper_sve_ldnf1bdu_r_mte,
4796
4797 gen_helper_sve_ldnf1sds_be_r_mte,
4798 gen_helper_sve_ldnf1hh_be_r_mte,
4799 gen_helper_sve_ldnf1hsu_be_r_mte,
4800 gen_helper_sve_ldnf1hdu_be_r_mte,
4801
4802 gen_helper_sve_ldnf1hds_be_r_mte,
4803 gen_helper_sve_ldnf1hss_be_r_mte,
4804 gen_helper_sve_ldnf1ss_be_r_mte,
4805 gen_helper_sve_ldnf1sdu_be_r_mte,
4806
4807 gen_helper_sve_ldnf1bds_r_mte,
4808 gen_helper_sve_ldnf1bss_r_mte,
4809 gen_helper_sve_ldnf1bhs_r_mte,
4810 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4811 };
4812
4813 if (sve_access_check(s)) {
4814 int vsz = vec_full_reg_size(s);
4815 int elements = vsz >> dtype_esz[a->dtype];
4816 int off = (a->imm * elements) << dtype_msz(a->dtype);
4817 TCGv_i64 addr = new_tmp_a64(s);
4818
4819 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4820 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4821 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4822 }
4823 return true;
4824}
1a039c7e 4825
c182c6db 4826static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 4827{
05abe304
RH
4828 unsigned vsz = vec_full_reg_size(s);
4829 TCGv_ptr t_pg;
7924d239 4830 int poff;
05abe304
RH
4831
4832 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
4833 poff = pred_full_reg_offset(s, pg);
4834 if (vsz > 16) {
4835 /*
4836 * Zero-extend the first 16 bits of the predicate into a temporary.
4837 * This avoids triggering an assert making sure we don't have bits
4838 * set within a predicate beyond VQ, but we have lowered VQ to 1
4839 * for this load operation.
4840 */
4841 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4842#if HOST_BIG_ENDIAN
2a99ab2b
RH
4843 poff += 6;
4844#endif
4845 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4846
4847 poff = offsetof(CPUARMState, vfp.preg_tmp);
4848 tcg_gen_st_i64(tmp, cpu_env, poff);
4849 tcg_temp_free_i64(tmp);
4850 }
4851
05abe304 4852 t_pg = tcg_temp_new_ptr();
2a99ab2b 4853 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4854
c182c6db
RH
4855 gen_helper_gvec_mem *fn
4856 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 4857 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
4858
4859 tcg_temp_free_ptr(t_pg);
05abe304
RH
4860
4861 /* Replicate that first quadword. */
4862 if (vsz > 16) {
7924d239
RH
4863 int doff = vec_full_reg_offset(s, zt);
4864 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
4865 }
4866}
4867
3a7be554 4868static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4869{
4870 if (a->rm == 31) {
4871 return false;
4872 }
4873 if (sve_access_check(s)) {
4874 int msz = dtype_msz(a->dtype);
4875 TCGv_i64 addr = new_tmp_a64(s);
4876 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4877 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 4878 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4879 }
4880 return true;
4881}
4882
3a7be554 4883static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4884{
4885 if (sve_access_check(s)) {
4886 TCGv_i64 addr = new_tmp_a64(s);
4887 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 4888 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4889 }
4890 return true;
4891}
4892
12c563f6
RH
4893static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4894{
4895 unsigned vsz = vec_full_reg_size(s);
4896 unsigned vsz_r32;
4897 TCGv_ptr t_pg;
4898 int poff, doff;
4899
4900 if (vsz < 32) {
4901 /*
4902 * Note that this UNDEFINED check comes after CheckSVEEnabled()
4903 * in the ARM pseudocode, which is the sve_access_check() done
4904 * in our caller. We should not now return false from the caller.
4905 */
4906 unallocated_encoding(s);
4907 return;
4908 }
4909
4910 /* Load the first octaword using the normal predicated load helpers. */
4911
4912 poff = pred_full_reg_offset(s, pg);
4913 if (vsz > 32) {
4914 /*
4915 * Zero-extend the first 32 bits of the predicate into a temporary.
4916 * This avoids triggering an assert making sure we don't have bits
4917 * set within a predicate beyond VQ, but we have lowered VQ to 2
4918 * for this load operation.
4919 */
4920 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4921#if HOST_BIG_ENDIAN
12c563f6
RH
4922 poff += 4;
4923#endif
4924 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
4925
4926 poff = offsetof(CPUARMState, vfp.preg_tmp);
4927 tcg_gen_st_i64(tmp, cpu_env, poff);
4928 tcg_temp_free_i64(tmp);
4929 }
4930
4931 t_pg = tcg_temp_new_ptr();
4932 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4933
4934 gen_helper_gvec_mem *fn
4935 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4936 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
4937
4938 tcg_temp_free_ptr(t_pg);
4939
4940 /*
4941 * Replicate that first octaword.
4942 * The replication happens in units of 32; if the full vector size
4943 * is not a multiple of 32, the final bits are zeroed.
4944 */
4945 doff = vec_full_reg_offset(s, zt);
4946 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4947 if (vsz >= 64) {
4948 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4949 }
4950 vsz -= vsz_r32;
4951 if (vsz) {
4952 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4953 }
4954}
4955
4956static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4957{
4958 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4959 return false;
4960 }
4961 if (a->rm == 31) {
4962 return false;
4963 }
4964 if (sve_access_check(s)) {
4965 TCGv_i64 addr = new_tmp_a64(s);
4966 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4967 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4968 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4969 }
4970 return true;
4971}
4972
4973static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4974{
4975 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4976 return false;
4977 }
4978 if (sve_access_check(s)) {
4979 TCGv_i64 addr = new_tmp_a64(s);
4980 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4981 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4982 }
4983 return true;
4984}
4985
68459864 4986/* Load and broadcast element. */
3a7be554 4987static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 4988{
68459864
RH
4989 unsigned vsz = vec_full_reg_size(s);
4990 unsigned psz = pred_full_reg_size(s);
4991 unsigned esz = dtype_esz[a->dtype];
d0e372b0 4992 unsigned msz = dtype_msz(a->dtype);
c0ed9166 4993 TCGLabel *over;
4ac430e1 4994 TCGv_i64 temp, clean_addr;
68459864 4995
c0ed9166
RH
4996 if (!sve_access_check(s)) {
4997 return true;
4998 }
4999
5000 over = gen_new_label();
5001
68459864
RH
5002 /* If the guarding predicate has no bits set, no load occurs. */
5003 if (psz <= 8) {
5004 /* Reduce the pred_esz_masks value simply to reduce the
5005 * size of the code generated here.
5006 */
5007 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5008 temp = tcg_temp_new_i64();
5009 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5010 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5011 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5012 tcg_temp_free_i64(temp);
5013 } else {
5014 TCGv_i32 t32 = tcg_temp_new_i32();
5015 find_last_active(s, t32, esz, a->pg);
5016 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5017 tcg_temp_free_i32(t32);
5018 }
5019
5020 /* Load the data. */
5021 temp = tcg_temp_new_i64();
d0e372b0 5022 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5023 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5024
5025 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5026 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5027
5028 /* Broadcast to *all* elements. */
5029 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5030 vsz, vsz, temp);
5031 tcg_temp_free_i64(temp);
5032
5033 /* Zero the inactive elements. */
5034 gen_set_label(over);
60245996 5035 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5036}
5037
1a039c7e
RH
5038static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5039 int msz, int esz, int nreg)
5040{
71b9f394
RH
5041 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5042 { { { gen_helper_sve_st1bb_r,
5043 gen_helper_sve_st1bh_r,
5044 gen_helper_sve_st1bs_r,
5045 gen_helper_sve_st1bd_r },
5046 { NULL,
5047 gen_helper_sve_st1hh_le_r,
5048 gen_helper_sve_st1hs_le_r,
5049 gen_helper_sve_st1hd_le_r },
5050 { NULL, NULL,
5051 gen_helper_sve_st1ss_le_r,
5052 gen_helper_sve_st1sd_le_r },
5053 { NULL, NULL, NULL,
5054 gen_helper_sve_st1dd_le_r } },
5055 { { gen_helper_sve_st1bb_r,
5056 gen_helper_sve_st1bh_r,
5057 gen_helper_sve_st1bs_r,
5058 gen_helper_sve_st1bd_r },
5059 { NULL,
5060 gen_helper_sve_st1hh_be_r,
5061 gen_helper_sve_st1hs_be_r,
5062 gen_helper_sve_st1hd_be_r },
5063 { NULL, NULL,
5064 gen_helper_sve_st1ss_be_r,
5065 gen_helper_sve_st1sd_be_r },
5066 { NULL, NULL, NULL,
5067 gen_helper_sve_st1dd_be_r } } },
5068
5069 { { { gen_helper_sve_st1bb_r_mte,
5070 gen_helper_sve_st1bh_r_mte,
5071 gen_helper_sve_st1bs_r_mte,
5072 gen_helper_sve_st1bd_r_mte },
5073 { NULL,
5074 gen_helper_sve_st1hh_le_r_mte,
5075 gen_helper_sve_st1hs_le_r_mte,
5076 gen_helper_sve_st1hd_le_r_mte },
5077 { NULL, NULL,
5078 gen_helper_sve_st1ss_le_r_mte,
5079 gen_helper_sve_st1sd_le_r_mte },
5080 { NULL, NULL, NULL,
5081 gen_helper_sve_st1dd_le_r_mte } },
5082 { { gen_helper_sve_st1bb_r_mte,
5083 gen_helper_sve_st1bh_r_mte,
5084 gen_helper_sve_st1bs_r_mte,
5085 gen_helper_sve_st1bd_r_mte },
5086 { NULL,
5087 gen_helper_sve_st1hh_be_r_mte,
5088 gen_helper_sve_st1hs_be_r_mte,
5089 gen_helper_sve_st1hd_be_r_mte },
5090 { NULL, NULL,
5091 gen_helper_sve_st1ss_be_r_mte,
5092 gen_helper_sve_st1sd_be_r_mte },
5093 { NULL, NULL, NULL,
5094 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5095 };
71b9f394
RH
5096 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5097 { { { gen_helper_sve_st2bb_r,
5098 gen_helper_sve_st2hh_le_r,
5099 gen_helper_sve_st2ss_le_r,
5100 gen_helper_sve_st2dd_le_r },
5101 { gen_helper_sve_st3bb_r,
5102 gen_helper_sve_st3hh_le_r,
5103 gen_helper_sve_st3ss_le_r,
5104 gen_helper_sve_st3dd_le_r },
5105 { gen_helper_sve_st4bb_r,
5106 gen_helper_sve_st4hh_le_r,
5107 gen_helper_sve_st4ss_le_r,
5108 gen_helper_sve_st4dd_le_r } },
5109 { { gen_helper_sve_st2bb_r,
5110 gen_helper_sve_st2hh_be_r,
5111 gen_helper_sve_st2ss_be_r,
5112 gen_helper_sve_st2dd_be_r },
5113 { gen_helper_sve_st3bb_r,
5114 gen_helper_sve_st3hh_be_r,
5115 gen_helper_sve_st3ss_be_r,
5116 gen_helper_sve_st3dd_be_r },
5117 { gen_helper_sve_st4bb_r,
5118 gen_helper_sve_st4hh_be_r,
5119 gen_helper_sve_st4ss_be_r,
5120 gen_helper_sve_st4dd_be_r } } },
5121 { { { gen_helper_sve_st2bb_r_mte,
5122 gen_helper_sve_st2hh_le_r_mte,
5123 gen_helper_sve_st2ss_le_r_mte,
5124 gen_helper_sve_st2dd_le_r_mte },
5125 { gen_helper_sve_st3bb_r_mte,
5126 gen_helper_sve_st3hh_le_r_mte,
5127 gen_helper_sve_st3ss_le_r_mte,
5128 gen_helper_sve_st3dd_le_r_mte },
5129 { gen_helper_sve_st4bb_r_mte,
5130 gen_helper_sve_st4hh_le_r_mte,
5131 gen_helper_sve_st4ss_le_r_mte,
5132 gen_helper_sve_st4dd_le_r_mte } },
5133 { { gen_helper_sve_st2bb_r_mte,
5134 gen_helper_sve_st2hh_be_r_mte,
5135 gen_helper_sve_st2ss_be_r_mte,
5136 gen_helper_sve_st2dd_be_r_mte },
5137 { gen_helper_sve_st3bb_r_mte,
5138 gen_helper_sve_st3hh_be_r_mte,
5139 gen_helper_sve_st3ss_be_r_mte,
5140 gen_helper_sve_st3dd_be_r_mte },
5141 { gen_helper_sve_st4bb_r_mte,
5142 gen_helper_sve_st4hh_be_r_mte,
5143 gen_helper_sve_st4ss_be_r_mte,
5144 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5145 };
5146 gen_helper_gvec_mem *fn;
28d57f2d 5147 int be = s->be_data == MO_BE;
1a039c7e
RH
5148
5149 if (nreg == 0) {
5150 /* ST1 */
71b9f394
RH
5151 fn = fn_single[s->mte_active[0]][be][msz][esz];
5152 nreg = 1;
1a039c7e
RH
5153 } else {
5154 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5155 assert(msz == esz);
71b9f394 5156 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5157 }
5158 assert(fn != NULL);
71b9f394 5159 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5160}
5161
3a7be554 5162static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5163{
5164 if (a->rm == 31 || a->msz > a->esz) {
5165 return false;
5166 }
5167 if (sve_access_check(s)) {
5168 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5169 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5170 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5171 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5172 }
5173 return true;
5174}
5175
3a7be554 5176static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5177{
5178 if (a->msz > a->esz) {
5179 return false;
5180 }
5181 if (sve_access_check(s)) {
5182 int vsz = vec_full_reg_size(s);
5183 int elements = vsz >> a->esz;
5184 TCGv_i64 addr = new_tmp_a64(s);
5185
5186 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5187 (a->imm * elements * (a->nreg + 1)) << a->msz);
5188 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5189 }
5190 return true;
5191}
f6dbf62a
RH
5192
5193/*
5194 *** SVE gather loads / scatter stores
5195 */
5196
500d0484 5197static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5198 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5199 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5200{
5201 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5202 TCGv_ptr t_zm = tcg_temp_new_ptr();
5203 TCGv_ptr t_pg = tcg_temp_new_ptr();
5204 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5205 int desc = 0;
500d0484 5206
d28d12f0
RH
5207 if (s->mte_active[0]) {
5208 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5209 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5210 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5211 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5212 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5213 desc <<= SVE_MTEDESC_SHIFT;
5214 }
cdecb3fc 5215 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5216
5217 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5218 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5219 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5220 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5221
5222 tcg_temp_free_ptr(t_zt);
5223 tcg_temp_free_ptr(t_zm);
5224 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5225}
5226
d28d12f0
RH
5227/* Indexed by [mte][be][ff][xs][u][msz]. */
5228static gen_helper_gvec_mem_scatter * const
5229gather_load_fn32[2][2][2][2][2][3] = {
5230 { /* MTE Inactive */
5231 { /* Little-endian */
5232 { { { gen_helper_sve_ldbss_zsu,
5233 gen_helper_sve_ldhss_le_zsu,
5234 NULL, },
5235 { gen_helper_sve_ldbsu_zsu,
5236 gen_helper_sve_ldhsu_le_zsu,
5237 gen_helper_sve_ldss_le_zsu, } },
5238 { { gen_helper_sve_ldbss_zss,
5239 gen_helper_sve_ldhss_le_zss,
5240 NULL, },
5241 { gen_helper_sve_ldbsu_zss,
5242 gen_helper_sve_ldhsu_le_zss,
5243 gen_helper_sve_ldss_le_zss, } } },
5244
5245 /* First-fault */
5246 { { { gen_helper_sve_ldffbss_zsu,
5247 gen_helper_sve_ldffhss_le_zsu,
5248 NULL, },
5249 { gen_helper_sve_ldffbsu_zsu,
5250 gen_helper_sve_ldffhsu_le_zsu,
5251 gen_helper_sve_ldffss_le_zsu, } },
5252 { { gen_helper_sve_ldffbss_zss,
5253 gen_helper_sve_ldffhss_le_zss,
5254 NULL, },
5255 { gen_helper_sve_ldffbsu_zss,
5256 gen_helper_sve_ldffhsu_le_zss,
5257 gen_helper_sve_ldffss_le_zss, } } } },
5258
5259 { /* Big-endian */
5260 { { { gen_helper_sve_ldbss_zsu,
5261 gen_helper_sve_ldhss_be_zsu,
5262 NULL, },
5263 { gen_helper_sve_ldbsu_zsu,
5264 gen_helper_sve_ldhsu_be_zsu,
5265 gen_helper_sve_ldss_be_zsu, } },
5266 { { gen_helper_sve_ldbss_zss,
5267 gen_helper_sve_ldhss_be_zss,
5268 NULL, },
5269 { gen_helper_sve_ldbsu_zss,
5270 gen_helper_sve_ldhsu_be_zss,
5271 gen_helper_sve_ldss_be_zss, } } },
5272
5273 /* First-fault */
5274 { { { gen_helper_sve_ldffbss_zsu,
5275 gen_helper_sve_ldffhss_be_zsu,
5276 NULL, },
5277 { gen_helper_sve_ldffbsu_zsu,
5278 gen_helper_sve_ldffhsu_be_zsu,
5279 gen_helper_sve_ldffss_be_zsu, } },
5280 { { gen_helper_sve_ldffbss_zss,
5281 gen_helper_sve_ldffhss_be_zss,
5282 NULL, },
5283 { gen_helper_sve_ldffbsu_zss,
5284 gen_helper_sve_ldffhsu_be_zss,
5285 gen_helper_sve_ldffss_be_zss, } } } } },
5286 { /* MTE Active */
5287 { /* Little-endian */
5288 { { { gen_helper_sve_ldbss_zsu_mte,
5289 gen_helper_sve_ldhss_le_zsu_mte,
5290 NULL, },
5291 { gen_helper_sve_ldbsu_zsu_mte,
5292 gen_helper_sve_ldhsu_le_zsu_mte,
5293 gen_helper_sve_ldss_le_zsu_mte, } },
5294 { { gen_helper_sve_ldbss_zss_mte,
5295 gen_helper_sve_ldhss_le_zss_mte,
5296 NULL, },
5297 { gen_helper_sve_ldbsu_zss_mte,
5298 gen_helper_sve_ldhsu_le_zss_mte,
5299 gen_helper_sve_ldss_le_zss_mte, } } },
5300
5301 /* First-fault */
5302 { { { gen_helper_sve_ldffbss_zsu_mte,
5303 gen_helper_sve_ldffhss_le_zsu_mte,
5304 NULL, },
5305 { gen_helper_sve_ldffbsu_zsu_mte,
5306 gen_helper_sve_ldffhsu_le_zsu_mte,
5307 gen_helper_sve_ldffss_le_zsu_mte, } },
5308 { { gen_helper_sve_ldffbss_zss_mte,
5309 gen_helper_sve_ldffhss_le_zss_mte,
5310 NULL, },
5311 { gen_helper_sve_ldffbsu_zss_mte,
5312 gen_helper_sve_ldffhsu_le_zss_mte,
5313 gen_helper_sve_ldffss_le_zss_mte, } } } },
5314
5315 { /* Big-endian */
5316 { { { gen_helper_sve_ldbss_zsu_mte,
5317 gen_helper_sve_ldhss_be_zsu_mte,
5318 NULL, },
5319 { gen_helper_sve_ldbsu_zsu_mte,
5320 gen_helper_sve_ldhsu_be_zsu_mte,
5321 gen_helper_sve_ldss_be_zsu_mte, } },
5322 { { gen_helper_sve_ldbss_zss_mte,
5323 gen_helper_sve_ldhss_be_zss_mte,
5324 NULL, },
5325 { gen_helper_sve_ldbsu_zss_mte,
5326 gen_helper_sve_ldhsu_be_zss_mte,
5327 gen_helper_sve_ldss_be_zss_mte, } } },
5328
5329 /* First-fault */
5330 { { { gen_helper_sve_ldffbss_zsu_mte,
5331 gen_helper_sve_ldffhss_be_zsu_mte,
5332 NULL, },
5333 { gen_helper_sve_ldffbsu_zsu_mte,
5334 gen_helper_sve_ldffhsu_be_zsu_mte,
5335 gen_helper_sve_ldffss_be_zsu_mte, } },
5336 { { gen_helper_sve_ldffbss_zss_mte,
5337 gen_helper_sve_ldffhss_be_zss_mte,
5338 NULL, },
5339 { gen_helper_sve_ldffbsu_zss_mte,
5340 gen_helper_sve_ldffhsu_be_zss_mte,
5341 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5342};
5343
5344/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5345static gen_helper_gvec_mem_scatter * const
5346gather_load_fn64[2][2][2][3][2][4] = {
5347 { /* MTE Inactive */
5348 { /* Little-endian */
5349 { { { gen_helper_sve_ldbds_zsu,
5350 gen_helper_sve_ldhds_le_zsu,
5351 gen_helper_sve_ldsds_le_zsu,
5352 NULL, },
5353 { gen_helper_sve_ldbdu_zsu,
5354 gen_helper_sve_ldhdu_le_zsu,
5355 gen_helper_sve_ldsdu_le_zsu,
5356 gen_helper_sve_lddd_le_zsu, } },
5357 { { gen_helper_sve_ldbds_zss,
5358 gen_helper_sve_ldhds_le_zss,
5359 gen_helper_sve_ldsds_le_zss,
5360 NULL, },
5361 { gen_helper_sve_ldbdu_zss,
5362 gen_helper_sve_ldhdu_le_zss,
5363 gen_helper_sve_ldsdu_le_zss,
5364 gen_helper_sve_lddd_le_zss, } },
5365 { { gen_helper_sve_ldbds_zd,
5366 gen_helper_sve_ldhds_le_zd,
5367 gen_helper_sve_ldsds_le_zd,
5368 NULL, },
5369 { gen_helper_sve_ldbdu_zd,
5370 gen_helper_sve_ldhdu_le_zd,
5371 gen_helper_sve_ldsdu_le_zd,
5372 gen_helper_sve_lddd_le_zd, } } },
5373
5374 /* First-fault */
5375 { { { gen_helper_sve_ldffbds_zsu,
5376 gen_helper_sve_ldffhds_le_zsu,
5377 gen_helper_sve_ldffsds_le_zsu,
5378 NULL, },
5379 { gen_helper_sve_ldffbdu_zsu,
5380 gen_helper_sve_ldffhdu_le_zsu,
5381 gen_helper_sve_ldffsdu_le_zsu,
5382 gen_helper_sve_ldffdd_le_zsu, } },
5383 { { gen_helper_sve_ldffbds_zss,
5384 gen_helper_sve_ldffhds_le_zss,
5385 gen_helper_sve_ldffsds_le_zss,
5386 NULL, },
5387 { gen_helper_sve_ldffbdu_zss,
5388 gen_helper_sve_ldffhdu_le_zss,
5389 gen_helper_sve_ldffsdu_le_zss,
5390 gen_helper_sve_ldffdd_le_zss, } },
5391 { { gen_helper_sve_ldffbds_zd,
5392 gen_helper_sve_ldffhds_le_zd,
5393 gen_helper_sve_ldffsds_le_zd,
5394 NULL, },
5395 { gen_helper_sve_ldffbdu_zd,
5396 gen_helper_sve_ldffhdu_le_zd,
5397 gen_helper_sve_ldffsdu_le_zd,
5398 gen_helper_sve_ldffdd_le_zd, } } } },
5399 { /* Big-endian */
5400 { { { gen_helper_sve_ldbds_zsu,
5401 gen_helper_sve_ldhds_be_zsu,
5402 gen_helper_sve_ldsds_be_zsu,
5403 NULL, },
5404 { gen_helper_sve_ldbdu_zsu,
5405 gen_helper_sve_ldhdu_be_zsu,
5406 gen_helper_sve_ldsdu_be_zsu,
5407 gen_helper_sve_lddd_be_zsu, } },
5408 { { gen_helper_sve_ldbds_zss,
5409 gen_helper_sve_ldhds_be_zss,
5410 gen_helper_sve_ldsds_be_zss,
5411 NULL, },
5412 { gen_helper_sve_ldbdu_zss,
5413 gen_helper_sve_ldhdu_be_zss,
5414 gen_helper_sve_ldsdu_be_zss,
5415 gen_helper_sve_lddd_be_zss, } },
5416 { { gen_helper_sve_ldbds_zd,
5417 gen_helper_sve_ldhds_be_zd,
5418 gen_helper_sve_ldsds_be_zd,
5419 NULL, },
5420 { gen_helper_sve_ldbdu_zd,
5421 gen_helper_sve_ldhdu_be_zd,
5422 gen_helper_sve_ldsdu_be_zd,
5423 gen_helper_sve_lddd_be_zd, } } },
5424
5425 /* First-fault */
5426 { { { gen_helper_sve_ldffbds_zsu,
5427 gen_helper_sve_ldffhds_be_zsu,
5428 gen_helper_sve_ldffsds_be_zsu,
5429 NULL, },
5430 { gen_helper_sve_ldffbdu_zsu,
5431 gen_helper_sve_ldffhdu_be_zsu,
5432 gen_helper_sve_ldffsdu_be_zsu,
5433 gen_helper_sve_ldffdd_be_zsu, } },
5434 { { gen_helper_sve_ldffbds_zss,
5435 gen_helper_sve_ldffhds_be_zss,
5436 gen_helper_sve_ldffsds_be_zss,
5437 NULL, },
5438 { gen_helper_sve_ldffbdu_zss,
5439 gen_helper_sve_ldffhdu_be_zss,
5440 gen_helper_sve_ldffsdu_be_zss,
5441 gen_helper_sve_ldffdd_be_zss, } },
5442 { { gen_helper_sve_ldffbds_zd,
5443 gen_helper_sve_ldffhds_be_zd,
5444 gen_helper_sve_ldffsds_be_zd,
5445 NULL, },
5446 { gen_helper_sve_ldffbdu_zd,
5447 gen_helper_sve_ldffhdu_be_zd,
5448 gen_helper_sve_ldffsdu_be_zd,
5449 gen_helper_sve_ldffdd_be_zd, } } } } },
5450 { /* MTE Active */
5451 { /* Little-endian */
5452 { { { gen_helper_sve_ldbds_zsu_mte,
5453 gen_helper_sve_ldhds_le_zsu_mte,
5454 gen_helper_sve_ldsds_le_zsu_mte,
5455 NULL, },
5456 { gen_helper_sve_ldbdu_zsu_mte,
5457 gen_helper_sve_ldhdu_le_zsu_mte,
5458 gen_helper_sve_ldsdu_le_zsu_mte,
5459 gen_helper_sve_lddd_le_zsu_mte, } },
5460 { { gen_helper_sve_ldbds_zss_mte,
5461 gen_helper_sve_ldhds_le_zss_mte,
5462 gen_helper_sve_ldsds_le_zss_mte,
5463 NULL, },
5464 { gen_helper_sve_ldbdu_zss_mte,
5465 gen_helper_sve_ldhdu_le_zss_mte,
5466 gen_helper_sve_ldsdu_le_zss_mte,
5467 gen_helper_sve_lddd_le_zss_mte, } },
5468 { { gen_helper_sve_ldbds_zd_mte,
5469 gen_helper_sve_ldhds_le_zd_mte,
5470 gen_helper_sve_ldsds_le_zd_mte,
5471 NULL, },
5472 { gen_helper_sve_ldbdu_zd_mte,
5473 gen_helper_sve_ldhdu_le_zd_mte,
5474 gen_helper_sve_ldsdu_le_zd_mte,
5475 gen_helper_sve_lddd_le_zd_mte, } } },
5476
5477 /* First-fault */
5478 { { { gen_helper_sve_ldffbds_zsu_mte,
5479 gen_helper_sve_ldffhds_le_zsu_mte,
5480 gen_helper_sve_ldffsds_le_zsu_mte,
5481 NULL, },
5482 { gen_helper_sve_ldffbdu_zsu_mte,
5483 gen_helper_sve_ldffhdu_le_zsu_mte,
5484 gen_helper_sve_ldffsdu_le_zsu_mte,
5485 gen_helper_sve_ldffdd_le_zsu_mte, } },
5486 { { gen_helper_sve_ldffbds_zss_mte,
5487 gen_helper_sve_ldffhds_le_zss_mte,
5488 gen_helper_sve_ldffsds_le_zss_mte,
5489 NULL, },
5490 { gen_helper_sve_ldffbdu_zss_mte,
5491 gen_helper_sve_ldffhdu_le_zss_mte,
5492 gen_helper_sve_ldffsdu_le_zss_mte,
5493 gen_helper_sve_ldffdd_le_zss_mte, } },
5494 { { gen_helper_sve_ldffbds_zd_mte,
5495 gen_helper_sve_ldffhds_le_zd_mte,
5496 gen_helper_sve_ldffsds_le_zd_mte,
5497 NULL, },
5498 { gen_helper_sve_ldffbdu_zd_mte,
5499 gen_helper_sve_ldffhdu_le_zd_mte,
5500 gen_helper_sve_ldffsdu_le_zd_mte,
5501 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5502 { /* Big-endian */
5503 { { { gen_helper_sve_ldbds_zsu_mte,
5504 gen_helper_sve_ldhds_be_zsu_mte,
5505 gen_helper_sve_ldsds_be_zsu_mte,
5506 NULL, },
5507 { gen_helper_sve_ldbdu_zsu_mte,
5508 gen_helper_sve_ldhdu_be_zsu_mte,
5509 gen_helper_sve_ldsdu_be_zsu_mte,
5510 gen_helper_sve_lddd_be_zsu_mte, } },
5511 { { gen_helper_sve_ldbds_zss_mte,
5512 gen_helper_sve_ldhds_be_zss_mte,
5513 gen_helper_sve_ldsds_be_zss_mte,
5514 NULL, },
5515 { gen_helper_sve_ldbdu_zss_mte,
5516 gen_helper_sve_ldhdu_be_zss_mte,
5517 gen_helper_sve_ldsdu_be_zss_mte,
5518 gen_helper_sve_lddd_be_zss_mte, } },
5519 { { gen_helper_sve_ldbds_zd_mte,
5520 gen_helper_sve_ldhds_be_zd_mte,
5521 gen_helper_sve_ldsds_be_zd_mte,
5522 NULL, },
5523 { gen_helper_sve_ldbdu_zd_mte,
5524 gen_helper_sve_ldhdu_be_zd_mte,
5525 gen_helper_sve_ldsdu_be_zd_mte,
5526 gen_helper_sve_lddd_be_zd_mte, } } },
5527
5528 /* First-fault */
5529 { { { gen_helper_sve_ldffbds_zsu_mte,
5530 gen_helper_sve_ldffhds_be_zsu_mte,
5531 gen_helper_sve_ldffsds_be_zsu_mte,
5532 NULL, },
5533 { gen_helper_sve_ldffbdu_zsu_mte,
5534 gen_helper_sve_ldffhdu_be_zsu_mte,
5535 gen_helper_sve_ldffsdu_be_zsu_mte,
5536 gen_helper_sve_ldffdd_be_zsu_mte, } },
5537 { { gen_helper_sve_ldffbds_zss_mte,
5538 gen_helper_sve_ldffhds_be_zss_mte,
5539 gen_helper_sve_ldffsds_be_zss_mte,
5540 NULL, },
5541 { gen_helper_sve_ldffbdu_zss_mte,
5542 gen_helper_sve_ldffhdu_be_zss_mte,
5543 gen_helper_sve_ldffsdu_be_zss_mte,
5544 gen_helper_sve_ldffdd_be_zss_mte, } },
5545 { { gen_helper_sve_ldffbds_zd_mte,
5546 gen_helper_sve_ldffhds_be_zd_mte,
5547 gen_helper_sve_ldffsds_be_zd_mte,
5548 NULL, },
5549 { gen_helper_sve_ldffbdu_zd_mte,
5550 gen_helper_sve_ldffhdu_be_zd_mte,
5551 gen_helper_sve_ldffsdu_be_zd_mte,
5552 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5553};
5554
3a7be554 5555static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5556{
5557 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5558 bool be = s->be_data == MO_BE;
5559 bool mte = s->mte_active[0];
673e9fa6
RH
5560
5561 if (!sve_access_check(s)) {
5562 return true;
5563 }
5564
5565 switch (a->esz) {
5566 case MO_32:
d28d12f0 5567 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5568 break;
5569 case MO_64:
d28d12f0 5570 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5571 break;
5572 }
5573 assert(fn != NULL);
5574
5575 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5576 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5577 return true;
5578}
5579
3a7be554 5580static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5581{
5582 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5583 bool be = s->be_data == MO_BE;
5584 bool mte = s->mte_active[0];
673e9fa6
RH
5585
5586 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5587 return false;
5588 }
5589 if (!sve_access_check(s)) {
5590 return true;
5591 }
5592
5593 switch (a->esz) {
5594 case MO_32:
d28d12f0 5595 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5596 break;
5597 case MO_64:
d28d12f0 5598 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5599 break;
5600 }
5601 assert(fn != NULL);
5602
5603 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5604 * by loading the immediate into the scalar parameter.
5605 */
2ccdf94f
RH
5606 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5607 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5608 return true;
5609}
5610
cf327449
SL
5611static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5612{
b17ab470
RH
5613 gen_helper_gvec_mem_scatter *fn = NULL;
5614 bool be = s->be_data == MO_BE;
5615 bool mte = s->mte_active[0];
5616
5617 if (a->esz < a->msz + !a->u) {
5618 return false;
5619 }
cf327449
SL
5620 if (!dc_isar_feature(aa64_sve2, s)) {
5621 return false;
5622 }
b17ab470
RH
5623 if (!sve_access_check(s)) {
5624 return true;
5625 }
5626
5627 switch (a->esz) {
5628 case MO_32:
5629 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5630 break;
5631 case MO_64:
5632 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5633 break;
5634 }
5635 assert(fn != NULL);
5636
5637 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5638 cpu_reg(s, a->rm), a->msz, false, fn);
5639 return true;
cf327449
SL
5640}
5641
d28d12f0
RH
5642/* Indexed by [mte][be][xs][msz]. */
5643static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5644 { /* MTE Inactive */
5645 { /* Little-endian */
5646 { gen_helper_sve_stbs_zsu,
5647 gen_helper_sve_sths_le_zsu,
5648 gen_helper_sve_stss_le_zsu, },
5649 { gen_helper_sve_stbs_zss,
5650 gen_helper_sve_sths_le_zss,
5651 gen_helper_sve_stss_le_zss, } },
5652 { /* Big-endian */
5653 { gen_helper_sve_stbs_zsu,
5654 gen_helper_sve_sths_be_zsu,
5655 gen_helper_sve_stss_be_zsu, },
5656 { gen_helper_sve_stbs_zss,
5657 gen_helper_sve_sths_be_zss,
5658 gen_helper_sve_stss_be_zss, } } },
5659 { /* MTE Active */
5660 { /* Little-endian */
5661 { gen_helper_sve_stbs_zsu_mte,
5662 gen_helper_sve_sths_le_zsu_mte,
5663 gen_helper_sve_stss_le_zsu_mte, },
5664 { gen_helper_sve_stbs_zss_mte,
5665 gen_helper_sve_sths_le_zss_mte,
5666 gen_helper_sve_stss_le_zss_mte, } },
5667 { /* Big-endian */
5668 { gen_helper_sve_stbs_zsu_mte,
5669 gen_helper_sve_sths_be_zsu_mte,
5670 gen_helper_sve_stss_be_zsu_mte, },
5671 { gen_helper_sve_stbs_zss_mte,
5672 gen_helper_sve_sths_be_zss_mte,
5673 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5674};
5675
5676/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5677static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5678 { /* MTE Inactive */
5679 { /* Little-endian */
5680 { gen_helper_sve_stbd_zsu,
5681 gen_helper_sve_sthd_le_zsu,
5682 gen_helper_sve_stsd_le_zsu,
5683 gen_helper_sve_stdd_le_zsu, },
5684 { gen_helper_sve_stbd_zss,
5685 gen_helper_sve_sthd_le_zss,
5686 gen_helper_sve_stsd_le_zss,
5687 gen_helper_sve_stdd_le_zss, },
5688 { gen_helper_sve_stbd_zd,
5689 gen_helper_sve_sthd_le_zd,
5690 gen_helper_sve_stsd_le_zd,
5691 gen_helper_sve_stdd_le_zd, } },
5692 { /* Big-endian */
5693 { gen_helper_sve_stbd_zsu,
5694 gen_helper_sve_sthd_be_zsu,
5695 gen_helper_sve_stsd_be_zsu,
5696 gen_helper_sve_stdd_be_zsu, },
5697 { gen_helper_sve_stbd_zss,
5698 gen_helper_sve_sthd_be_zss,
5699 gen_helper_sve_stsd_be_zss,
5700 gen_helper_sve_stdd_be_zss, },
5701 { gen_helper_sve_stbd_zd,
5702 gen_helper_sve_sthd_be_zd,
5703 gen_helper_sve_stsd_be_zd,
5704 gen_helper_sve_stdd_be_zd, } } },
5705 { /* MTE Inactive */
5706 { /* Little-endian */
5707 { gen_helper_sve_stbd_zsu_mte,
5708 gen_helper_sve_sthd_le_zsu_mte,
5709 gen_helper_sve_stsd_le_zsu_mte,
5710 gen_helper_sve_stdd_le_zsu_mte, },
5711 { gen_helper_sve_stbd_zss_mte,
5712 gen_helper_sve_sthd_le_zss_mte,
5713 gen_helper_sve_stsd_le_zss_mte,
5714 gen_helper_sve_stdd_le_zss_mte, },
5715 { gen_helper_sve_stbd_zd_mte,
5716 gen_helper_sve_sthd_le_zd_mte,
5717 gen_helper_sve_stsd_le_zd_mte,
5718 gen_helper_sve_stdd_le_zd_mte, } },
5719 { /* Big-endian */
5720 { gen_helper_sve_stbd_zsu_mte,
5721 gen_helper_sve_sthd_be_zsu_mte,
5722 gen_helper_sve_stsd_be_zsu_mte,
5723 gen_helper_sve_stdd_be_zsu_mte, },
5724 { gen_helper_sve_stbd_zss_mte,
5725 gen_helper_sve_sthd_be_zss_mte,
5726 gen_helper_sve_stsd_be_zss_mte,
5727 gen_helper_sve_stdd_be_zss_mte, },
5728 { gen_helper_sve_stbd_zd_mte,
5729 gen_helper_sve_sthd_be_zd_mte,
5730 gen_helper_sve_stsd_be_zd_mte,
5731 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5732};
5733
3a7be554 5734static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5735{
f6dbf62a 5736 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5737 bool be = s->be_data == MO_BE;
5738 bool mte = s->mte_active[0];
f6dbf62a
RH
5739
5740 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5741 return false;
5742 }
5743 if (!sve_access_check(s)) {
5744 return true;
5745 }
5746 switch (a->esz) {
5747 case MO_32:
d28d12f0 5748 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5749 break;
5750 case MO_64:
d28d12f0 5751 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5752 break;
5753 default:
5754 g_assert_not_reached();
5755 }
5756 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5757 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5758 return true;
5759}
dec6cf6b 5760
3a7be554 5761static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5762{
5763 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5764 bool be = s->be_data == MO_BE;
5765 bool mte = s->mte_active[0];
408ecde9
RH
5766
5767 if (a->esz < a->msz) {
5768 return false;
5769 }
5770 if (!sve_access_check(s)) {
5771 return true;
5772 }
5773
5774 switch (a->esz) {
5775 case MO_32:
d28d12f0 5776 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5777 break;
5778 case MO_64:
d28d12f0 5779 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5780 break;
5781 }
5782 assert(fn != NULL);
5783
5784 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5785 * by loading the immediate into the scalar parameter.
5786 */
2ccdf94f
RH
5787 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5788 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5789 return true;
5790}
5791
6ebca45f
SL
5792static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5793{
b17ab470
RH
5794 gen_helper_gvec_mem_scatter *fn;
5795 bool be = s->be_data == MO_BE;
5796 bool mte = s->mte_active[0];
5797
5798 if (a->esz < a->msz) {
5799 return false;
5800 }
6ebca45f
SL
5801 if (!dc_isar_feature(aa64_sve2, s)) {
5802 return false;
5803 }
b17ab470
RH
5804 if (!sve_access_check(s)) {
5805 return true;
5806 }
5807
5808 switch (a->esz) {
5809 case MO_32:
5810 fn = scatter_store_fn32[mte][be][0][a->msz];
5811 break;
5812 case MO_64:
5813 fn = scatter_store_fn64[mte][be][2][a->msz];
5814 break;
5815 default:
5816 g_assert_not_reached();
5817 }
5818
5819 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5820 cpu_reg(s, a->rm), a->msz, true, fn);
5821 return true;
6ebca45f
SL
5822}
5823
dec6cf6b
RH
5824/*
5825 * Prefetches
5826 */
5827
3a7be554 5828static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5829{
5830 /* Prefetch is a nop within QEMU. */
2f95a3b0 5831 (void)sve_access_check(s);
dec6cf6b
RH
5832 return true;
5833}
5834
3a7be554 5835static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5836{
5837 if (a->rm == 31) {
5838 return false;
5839 }
5840 /* Prefetch is a nop within QEMU. */
2f95a3b0 5841 (void)sve_access_check(s);
dec6cf6b
RH
5842 return true;
5843}
a2103582
RH
5844
5845/*
5846 * Move Prefix
5847 *
5848 * TODO: The implementation so far could handle predicated merging movprfx.
5849 * The helper functions as written take an extra source register to
5850 * use in the operation, but the result is only written when predication
5851 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5852 * to allow the final write back to the destination to be unconditional.
5853 * For predicated zeroing movprfx, we need to rearrange the helpers to
5854 * allow the final write back to zero inactives.
5855 *
5856 * In the meantime, just emit the moves.
5857 */
5858
4b0b37e9
RH
5859TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5860TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5861TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5dad1ba5
RH
5862
5863/*
5864 * SVE2 Integer Multiply - Unpredicated
5865 */
5866
b262215b 5867TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 5868
bd394cf5
RH
5869static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5870 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5871 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5872};
5873TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5874 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 5875
bd394cf5
RH
5876static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5877 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5878 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5879};
5880TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5881 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 5882
bd394cf5
RH
5883TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5884 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 5885
bd394cf5
RH
5886static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5887 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5888 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5889};
5890TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5891 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 5892
bd394cf5
RH
5893static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5894 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5895 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5896};
5897TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5898 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 5899
d4b1e59d
RH
5900/*
5901 * SVE2 Integer - Predicated
5902 */
5903
5880bdc0
RH
5904static gen_helper_gvec_4 * const sadlp_fns[4] = {
5905 NULL, gen_helper_sve2_sadalp_zpzz_h,
5906 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5907};
5908TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5909 sadlp_fns[a->esz], a, 0)
d4b1e59d 5910
5880bdc0
RH
5911static gen_helper_gvec_4 * const uadlp_fns[4] = {
5912 NULL, gen_helper_sve2_uadalp_zpzz_h,
5913 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5914};
5915TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5916 uadlp_fns[a->esz], a, 0)
db366da8
RH
5917
5918/*
5919 * SVE2 integer unary operations (predicated)
5920 */
5921
b2c00961
RH
5922TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5923 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 5924
b2c00961
RH
5925TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5926 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 5927
b2c00961
RH
5928static gen_helper_gvec_3 * const sqabs_fns[4] = {
5929 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5930 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5931};
5932TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 5933
b2c00961
RH
5934static gen_helper_gvec_3 * const sqneg_fns[4] = {
5935 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5936 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5937};
5938TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 5939
5880bdc0
RH
5940DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5941DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5942DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 5943
5880bdc0
RH
5944DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5945DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5946DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 5947
5880bdc0
RH
5948DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5949DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5950DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 5951
5880bdc0
RH
5952DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5953DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5954DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 5955
5880bdc0
RH
5956DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5957DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5958DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5959DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5960DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 5961
5880bdc0
RH
5962DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5963DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5964DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5965DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5966DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5967DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
5968
5969/*
5970 * SVE2 Widening Integer Arithmetic
5971 */
5972
615f19fe
RH
5973static gen_helper_gvec_3 * const saddl_fns[4] = {
5974 NULL, gen_helper_sve2_saddl_h,
5975 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5976};
5977TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5978 saddl_fns[a->esz], a, 0)
5979TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5980 saddl_fns[a->esz], a, 3)
5981TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5982 saddl_fns[a->esz], a, 2)
5983
5984static gen_helper_gvec_3 * const ssubl_fns[4] = {
5985 NULL, gen_helper_sve2_ssubl_h,
5986 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5987};
5988TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5989 ssubl_fns[a->esz], a, 0)
5990TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5991 ssubl_fns[a->esz], a, 3)
5992TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5993 ssubl_fns[a->esz], a, 2)
5994TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
5995 ssubl_fns[a->esz], a, 1)
5996
5997static gen_helper_gvec_3 * const sabdl_fns[4] = {
5998 NULL, gen_helper_sve2_sabdl_h,
5999 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6000};
6001TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6002 sabdl_fns[a->esz], a, 0)
6003TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6004 sabdl_fns[a->esz], a, 3)
6005
6006static gen_helper_gvec_3 * const uaddl_fns[4] = {
6007 NULL, gen_helper_sve2_uaddl_h,
6008 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6009};
6010TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6011 uaddl_fns[a->esz], a, 0)
6012TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6013 uaddl_fns[a->esz], a, 3)
6014
6015static gen_helper_gvec_3 * const usubl_fns[4] = {
6016 NULL, gen_helper_sve2_usubl_h,
6017 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6018};
6019TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6020 usubl_fns[a->esz], a, 0)
6021TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6022 usubl_fns[a->esz], a, 3)
6023
6024static gen_helper_gvec_3 * const uabdl_fns[4] = {
6025 NULL, gen_helper_sve2_uabdl_h,
6026 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6027};
6028TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6029 uabdl_fns[a->esz], a, 0)
6030TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6031 uabdl_fns[a->esz], a, 3)
6032
6033static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6034 NULL, gen_helper_sve2_sqdmull_zzz_h,
6035 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6036};
6037TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6038 sqdmull_fns[a->esz], a, 0)
6039TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6040 sqdmull_fns[a->esz], a, 3)
6041
6042static gen_helper_gvec_3 * const smull_fns[4] = {
6043 NULL, gen_helper_sve2_smull_zzz_h,
6044 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6045};
6046TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6047 smull_fns[a->esz], a, 0)
6048TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6049 smull_fns[a->esz], a, 3)
6050
6051static gen_helper_gvec_3 * const umull_fns[4] = {
6052 NULL, gen_helper_sve2_umull_zzz_h,
6053 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6054};
6055TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6056 umull_fns[a->esz], a, 0)
6057TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6058 umull_fns[a->esz], a, 3)
6059
6060static gen_helper_gvec_3 * const eoril_fns[4] = {
6061 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6062 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6063};
6064TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6065TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6066
e3a56131
RH
6067static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6068{
6069 static gen_helper_gvec_3 * const fns[4] = {
6070 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6071 NULL, gen_helper_sve2_pmull_d,
6072 };
6073 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6074 return false;
6075 }
615f19fe 6076 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6077}
6078
615f19fe
RH
6079TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6080TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6081
615f19fe
RH
6082static gen_helper_gvec_3 * const saddw_fns[4] = {
6083 NULL, gen_helper_sve2_saddw_h,
6084 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6085};
6086TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6087TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6088
615f19fe
RH
6089static gen_helper_gvec_3 * const ssubw_fns[4] = {
6090 NULL, gen_helper_sve2_ssubw_h,
6091 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6092};
6093TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6094TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6095
615f19fe
RH
6096static gen_helper_gvec_3 * const uaddw_fns[4] = {
6097 NULL, gen_helper_sve2_uaddw_h,
6098 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6099};
6100TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6101TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6102
615f19fe
RH
6103static gen_helper_gvec_3 * const usubw_fns[4] = {
6104 NULL, gen_helper_sve2_usubw_h,
6105 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6106};
6107TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6108TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6109
6110static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6111{
6112 int top = imm & 1;
6113 int shl = imm >> 1;
6114 int halfbits = 4 << vece;
6115
6116 if (top) {
6117 if (shl == halfbits) {
6118 TCGv_vec t = tcg_temp_new_vec_matching(d);
6119 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6120 tcg_gen_and_vec(vece, d, n, t);
6121 tcg_temp_free_vec(t);
6122 } else {
6123 tcg_gen_sari_vec(vece, d, n, halfbits);
6124 tcg_gen_shli_vec(vece, d, d, shl);
6125 }
6126 } else {
6127 tcg_gen_shli_vec(vece, d, n, halfbits);
6128 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6129 }
6130}
6131
6132static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6133{
6134 int halfbits = 4 << vece;
6135 int top = imm & 1;
6136 int shl = (imm >> 1);
6137 int shift;
6138 uint64_t mask;
6139
6140 mask = MAKE_64BIT_MASK(0, halfbits);
6141 mask <<= shl;
6142 mask = dup_const(vece, mask);
6143
6144 shift = shl - top * halfbits;
6145 if (shift < 0) {
6146 tcg_gen_shri_i64(d, n, -shift);
6147 } else {
6148 tcg_gen_shli_i64(d, n, shift);
6149 }
6150 tcg_gen_andi_i64(d, d, mask);
6151}
6152
6153static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6154{
6155 gen_ushll_i64(MO_16, d, n, imm);
6156}
6157
6158static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6159{
6160 gen_ushll_i64(MO_32, d, n, imm);
6161}
6162
6163static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6164{
6165 gen_ushll_i64(MO_64, d, n, imm);
6166}
6167
6168static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6169{
6170 int halfbits = 4 << vece;
6171 int top = imm & 1;
6172 int shl = imm >> 1;
6173
6174 if (top) {
6175 if (shl == halfbits) {
6176 TCGv_vec t = tcg_temp_new_vec_matching(d);
6177 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6178 tcg_gen_and_vec(vece, d, n, t);
6179 tcg_temp_free_vec(t);
6180 } else {
6181 tcg_gen_shri_vec(vece, d, n, halfbits);
6182 tcg_gen_shli_vec(vece, d, d, shl);
6183 }
6184 } else {
6185 if (shl == 0) {
6186 TCGv_vec t = tcg_temp_new_vec_matching(d);
6187 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6188 tcg_gen_and_vec(vece, d, n, t);
6189 tcg_temp_free_vec(t);
6190 } else {
6191 tcg_gen_shli_vec(vece, d, n, halfbits);
6192 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6193 }
6194 }
6195}
6196
5a528bb5
RH
6197static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6198 const GVecGen2i ops[3], bool sel)
4269fef1 6199{
4269fef1 6200
5a528bb5 6201 if (a->esz < 0 || a->esz > 2) {
4269fef1
RH
6202 return false;
6203 }
6204 if (sve_access_check(s)) {
6205 unsigned vsz = vec_full_reg_size(s);
6206 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6207 vec_full_reg_offset(s, a->rn),
6208 vsz, vsz, (a->imm << 1) | sel,
5a528bb5 6209 &ops[a->esz]);
4269fef1
RH
6210 }
6211 return true;
6212}
6213
5a528bb5
RH
6214static const TCGOpcode sshll_list[] = {
6215 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6216};
6217static const GVecGen2i sshll_ops[3] = {
6218 { .fniv = gen_sshll_vec,
6219 .opt_opc = sshll_list,
6220 .fno = gen_helper_sve2_sshll_h,
6221 .vece = MO_16 },
6222 { .fniv = gen_sshll_vec,
6223 .opt_opc = sshll_list,
6224 .fno = gen_helper_sve2_sshll_s,
6225 .vece = MO_32 },
6226 { .fniv = gen_sshll_vec,
6227 .opt_opc = sshll_list,
6228 .fno = gen_helper_sve2_sshll_d,
6229 .vece = MO_64 }
6230};
6231TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6232TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
4269fef1 6233
5a528bb5
RH
6234static const TCGOpcode ushll_list[] = {
6235 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6236};
6237static const GVecGen2i ushll_ops[3] = {
6238 { .fni8 = gen_ushll16_i64,
6239 .fniv = gen_ushll_vec,
6240 .opt_opc = ushll_list,
6241 .fno = gen_helper_sve2_ushll_h,
6242 .vece = MO_16 },
6243 { .fni8 = gen_ushll32_i64,
6244 .fniv = gen_ushll_vec,
6245 .opt_opc = ushll_list,
6246 .fno = gen_helper_sve2_ushll_s,
6247 .vece = MO_32 },
6248 { .fni8 = gen_ushll64_i64,
6249 .fniv = gen_ushll_vec,
6250 .opt_opc = ushll_list,
6251 .fno = gen_helper_sve2_ushll_d,
6252 .vece = MO_64 },
6253};
6254TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6255TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
cb9c33b8 6256
615f19fe
RH
6257static gen_helper_gvec_3 * const bext_fns[4] = {
6258 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6259 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6260};
6261TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6262 bext_fns[a->esz], a, 0)
ed4a6387 6263
615f19fe
RH
6264static gen_helper_gvec_3 * const bdep_fns[4] = {
6265 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6266 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6267};
6268TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6269 bdep_fns[a->esz], a, 0)
ed4a6387 6270
615f19fe
RH
6271static gen_helper_gvec_3 * const bgrp_fns[4] = {
6272 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6273 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6274};
6275TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6276 bgrp_fns[a->esz], a, 0)
ed4a6387 6277
615f19fe
RH
6278static gen_helper_gvec_3 * const cadd_fns[4] = {
6279 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6280 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6281};
6282TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6283 cadd_fns[a->esz], a, 0)
6284TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6285 cadd_fns[a->esz], a, 1)
6286
6287static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6288 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6289 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6290};
6291TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6292 sqcadd_fns[a->esz], a, 0)
6293TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6294 sqcadd_fns[a->esz], a, 1)
38650638 6295
eeb4e84d
RH
6296static gen_helper_gvec_4 * const sabal_fns[4] = {
6297 NULL, gen_helper_sve2_sabal_h,
6298 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6299};
6300TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6301TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6302
eeb4e84d
RH
6303static gen_helper_gvec_4 * const uabal_fns[4] = {
6304 NULL, gen_helper_sve2_uabal_h,
6305 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6306};
6307TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6308TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6309
6310static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6311{
6312 static gen_helper_gvec_4 * const fns[2] = {
6313 gen_helper_sve2_adcl_s,
6314 gen_helper_sve2_adcl_d,
6315 };
6316 /*
6317 * Note that in this case the ESZ field encodes both size and sign.
6318 * Split out 'subtract' into bit 1 of the data field for the helper.
6319 */
eeb4e84d 6320 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6321}
6322
eeb4e84d
RH
6323TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6324TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6325
f2be26a5
RH
6326TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6327TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6328TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6329TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6330TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6331TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6332
79828dcb
RH
6333TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6334TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d 6335
6100d084
RH
6336static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6337 const GVecGen2 ops[3])
5ff2838d 6338{
6100d084 6339 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
5ff2838d
RH
6340 return false;
6341 }
6342 if (sve_access_check(s)) {
6343 unsigned vsz = vec_full_reg_size(s);
6344 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6345 vec_full_reg_offset(s, a->rn),
6346 vsz, vsz, &ops[a->esz]);
6347 }
6348 return true;
6349}
6350
6351static const TCGOpcode sqxtn_list[] = {
6352 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6353};
6354
6355static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6356{
6357 TCGv_vec t = tcg_temp_new_vec_matching(d);
6358 int halfbits = 4 << vece;
6359 int64_t mask = (1ull << halfbits) - 1;
6360 int64_t min = -1ull << (halfbits - 1);
6361 int64_t max = -min - 1;
6362
6363 tcg_gen_dupi_vec(vece, t, min);
6364 tcg_gen_smax_vec(vece, d, n, t);
6365 tcg_gen_dupi_vec(vece, t, max);
6366 tcg_gen_smin_vec(vece, d, d, t);
6367 tcg_gen_dupi_vec(vece, t, mask);
6368 tcg_gen_and_vec(vece, d, d, t);
6369 tcg_temp_free_vec(t);
6370}
6371
6100d084
RH
6372static const GVecGen2 sqxtnb_ops[3] = {
6373 { .fniv = gen_sqxtnb_vec,
6374 .opt_opc = sqxtn_list,
6375 .fno = gen_helper_sve2_sqxtnb_h,
6376 .vece = MO_16 },
6377 { .fniv = gen_sqxtnb_vec,
6378 .opt_opc = sqxtn_list,
6379 .fno = gen_helper_sve2_sqxtnb_s,
6380 .vece = MO_32 },
6381 { .fniv = gen_sqxtnb_vec,
6382 .opt_opc = sqxtn_list,
6383 .fno = gen_helper_sve2_sqxtnb_d,
6384 .vece = MO_64 },
6385};
6386TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
5ff2838d
RH
6387
6388static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6389{
6390 TCGv_vec t = tcg_temp_new_vec_matching(d);
6391 int halfbits = 4 << vece;
6392 int64_t mask = (1ull << halfbits) - 1;
6393 int64_t min = -1ull << (halfbits - 1);
6394 int64_t max = -min - 1;
6395
6396 tcg_gen_dupi_vec(vece, t, min);
6397 tcg_gen_smax_vec(vece, n, n, t);
6398 tcg_gen_dupi_vec(vece, t, max);
6399 tcg_gen_smin_vec(vece, n, n, t);
6400 tcg_gen_shli_vec(vece, n, n, halfbits);
6401 tcg_gen_dupi_vec(vece, t, mask);
6402 tcg_gen_bitsel_vec(vece, d, t, d, n);
6403 tcg_temp_free_vec(t);
6404}
6405
6100d084
RH
6406static const GVecGen2 sqxtnt_ops[3] = {
6407 { .fniv = gen_sqxtnt_vec,
6408 .opt_opc = sqxtn_list,
6409 .load_dest = true,
6410 .fno = gen_helper_sve2_sqxtnt_h,
6411 .vece = MO_16 },
6412 { .fniv = gen_sqxtnt_vec,
6413 .opt_opc = sqxtn_list,
6414 .load_dest = true,
6415 .fno = gen_helper_sve2_sqxtnt_s,
6416 .vece = MO_32 },
6417 { .fniv = gen_sqxtnt_vec,
6418 .opt_opc = sqxtn_list,
6419 .load_dest = true,
6420 .fno = gen_helper_sve2_sqxtnt_d,
6421 .vece = MO_64 },
6422};
6423TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
5ff2838d
RH
6424
6425static const TCGOpcode uqxtn_list[] = {
6426 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6427};
6428
6429static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6430{
6431 TCGv_vec t = tcg_temp_new_vec_matching(d);
6432 int halfbits = 4 << vece;
6433 int64_t max = (1ull << halfbits) - 1;
6434
6435 tcg_gen_dupi_vec(vece, t, max);
6436 tcg_gen_umin_vec(vece, d, n, t);
6437 tcg_temp_free_vec(t);
6438}
6439
6100d084
RH
6440static const GVecGen2 uqxtnb_ops[3] = {
6441 { .fniv = gen_uqxtnb_vec,
6442 .opt_opc = uqxtn_list,
6443 .fno = gen_helper_sve2_uqxtnb_h,
6444 .vece = MO_16 },
6445 { .fniv = gen_uqxtnb_vec,
6446 .opt_opc = uqxtn_list,
6447 .fno = gen_helper_sve2_uqxtnb_s,
6448 .vece = MO_32 },
6449 { .fniv = gen_uqxtnb_vec,
6450 .opt_opc = uqxtn_list,
6451 .fno = gen_helper_sve2_uqxtnb_d,
6452 .vece = MO_64 },
6453};
6454TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
5ff2838d
RH
6455
6456static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6457{
6458 TCGv_vec t = tcg_temp_new_vec_matching(d);
6459 int halfbits = 4 << vece;
6460 int64_t max = (1ull << halfbits) - 1;
6461
6462 tcg_gen_dupi_vec(vece, t, max);
6463 tcg_gen_umin_vec(vece, n, n, t);
6464 tcg_gen_shli_vec(vece, n, n, halfbits);
6465 tcg_gen_bitsel_vec(vece, d, t, d, n);
6466 tcg_temp_free_vec(t);
6467}
6468
6100d084
RH
6469static const GVecGen2 uqxtnt_ops[3] = {
6470 { .fniv = gen_uqxtnt_vec,
6471 .opt_opc = uqxtn_list,
6472 .load_dest = true,
6473 .fno = gen_helper_sve2_uqxtnt_h,
6474 .vece = MO_16 },
6475 { .fniv = gen_uqxtnt_vec,
6476 .opt_opc = uqxtn_list,
6477 .load_dest = true,
6478 .fno = gen_helper_sve2_uqxtnt_s,
6479 .vece = MO_32 },
6480 { .fniv = gen_uqxtnt_vec,
6481 .opt_opc = uqxtn_list,
6482 .load_dest = true,
6483 .fno = gen_helper_sve2_uqxtnt_d,
6484 .vece = MO_64 },
6485};
6486TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
5ff2838d
RH
6487
6488static const TCGOpcode sqxtun_list[] = {
6489 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6490};
6491
6492static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6493{
6494 TCGv_vec t = tcg_temp_new_vec_matching(d);
6495 int halfbits = 4 << vece;
6496 int64_t max = (1ull << halfbits) - 1;
6497
6498 tcg_gen_dupi_vec(vece, t, 0);
6499 tcg_gen_smax_vec(vece, d, n, t);
6500 tcg_gen_dupi_vec(vece, t, max);
6501 tcg_gen_umin_vec(vece, d, d, t);
6502 tcg_temp_free_vec(t);
6503}
6504
6100d084
RH
6505static const GVecGen2 sqxtunb_ops[3] = {
6506 { .fniv = gen_sqxtunb_vec,
6507 .opt_opc = sqxtun_list,
6508 .fno = gen_helper_sve2_sqxtunb_h,
6509 .vece = MO_16 },
6510 { .fniv = gen_sqxtunb_vec,
6511 .opt_opc = sqxtun_list,
6512 .fno = gen_helper_sve2_sqxtunb_s,
6513 .vece = MO_32 },
6514 { .fniv = gen_sqxtunb_vec,
6515 .opt_opc = sqxtun_list,
6516 .fno = gen_helper_sve2_sqxtunb_d,
6517 .vece = MO_64 },
6518};
6519TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
5ff2838d
RH
6520
6521static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6522{
6523 TCGv_vec t = tcg_temp_new_vec_matching(d);
6524 int halfbits = 4 << vece;
6525 int64_t max = (1ull << halfbits) - 1;
6526
6527 tcg_gen_dupi_vec(vece, t, 0);
6528 tcg_gen_smax_vec(vece, n, n, t);
6529 tcg_gen_dupi_vec(vece, t, max);
6530 tcg_gen_umin_vec(vece, n, n, t);
6531 tcg_gen_shli_vec(vece, n, n, halfbits);
6532 tcg_gen_bitsel_vec(vece, d, t, d, n);
6533 tcg_temp_free_vec(t);
6534}
6535
6100d084
RH
6536static const GVecGen2 sqxtunt_ops[3] = {
6537 { .fniv = gen_sqxtunt_vec,
6538 .opt_opc = sqxtun_list,
6539 .load_dest = true,
6540 .fno = gen_helper_sve2_sqxtunt_h,
6541 .vece = MO_16 },
6542 { .fniv = gen_sqxtunt_vec,
6543 .opt_opc = sqxtun_list,
6544 .load_dest = true,
6545 .fno = gen_helper_sve2_sqxtunt_s,
6546 .vece = MO_32 },
6547 { .fniv = gen_sqxtunt_vec,
6548 .opt_opc = sqxtun_list,
6549 .load_dest = true,
6550 .fno = gen_helper_sve2_sqxtunt_d,
6551 .vece = MO_64 },
6552};
6553TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
46d111b2 6554
f7f2f0fa
RH
6555static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6556 const GVecGen2i ops[3])
46d111b2 6557{
f7f2f0fa 6558 if (a->esz < 0 || a->esz > MO_32) {
46d111b2
RH
6559 return false;
6560 }
6561 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6562 if (sve_access_check(s)) {
6563 unsigned vsz = vec_full_reg_size(s);
6564 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6565 vec_full_reg_offset(s, a->rn),
6566 vsz, vsz, a->imm, &ops[a->esz]);
6567 }
6568 return true;
6569}
6570
6571static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6572{
6573 int halfbits = 4 << vece;
6574 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6575
6576 tcg_gen_shri_i64(d, n, shr);
6577 tcg_gen_andi_i64(d, d, mask);
6578}
6579
6580static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6581{
6582 gen_shrnb_i64(MO_16, d, n, shr);
6583}
6584
6585static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6586{
6587 gen_shrnb_i64(MO_32, d, n, shr);
6588}
6589
6590static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6591{
6592 gen_shrnb_i64(MO_64, d, n, shr);
6593}
6594
6595static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6596{
6597 TCGv_vec t = tcg_temp_new_vec_matching(d);
6598 int halfbits = 4 << vece;
6599 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6600
6601 tcg_gen_shri_vec(vece, n, n, shr);
6602 tcg_gen_dupi_vec(vece, t, mask);
6603 tcg_gen_and_vec(vece, d, n, t);
6604 tcg_temp_free_vec(t);
6605}
6606
f7f2f0fa
RH
6607static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6608static const GVecGen2i shrnb_ops[3] = {
6609 { .fni8 = gen_shrnb16_i64,
6610 .fniv = gen_shrnb_vec,
6611 .opt_opc = shrnb_vec_list,
6612 .fno = gen_helper_sve2_shrnb_h,
6613 .vece = MO_16 },
6614 { .fni8 = gen_shrnb32_i64,
6615 .fniv = gen_shrnb_vec,
6616 .opt_opc = shrnb_vec_list,
6617 .fno = gen_helper_sve2_shrnb_s,
6618 .vece = MO_32 },
6619 { .fni8 = gen_shrnb64_i64,
6620 .fniv = gen_shrnb_vec,
6621 .opt_opc = shrnb_vec_list,
6622 .fno = gen_helper_sve2_shrnb_d,
6623 .vece = MO_64 },
6624};
6625TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
46d111b2
RH
6626
6627static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6628{
6629 int halfbits = 4 << vece;
6630 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6631
6632 tcg_gen_shli_i64(n, n, halfbits - shr);
6633 tcg_gen_andi_i64(n, n, ~mask);
6634 tcg_gen_andi_i64(d, d, mask);
6635 tcg_gen_or_i64(d, d, n);
6636}
6637
6638static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6639{
6640 gen_shrnt_i64(MO_16, d, n, shr);
6641}
6642
6643static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6644{
6645 gen_shrnt_i64(MO_32, d, n, shr);
6646}
6647
6648static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6649{
6650 tcg_gen_shri_i64(n, n, shr);
6651 tcg_gen_deposit_i64(d, d, n, 32, 32);
6652}
6653
6654static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6655{
6656 TCGv_vec t = tcg_temp_new_vec_matching(d);
6657 int halfbits = 4 << vece;
6658 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6659
6660 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6661 tcg_gen_dupi_vec(vece, t, mask);
6662 tcg_gen_bitsel_vec(vece, d, t, d, n);
6663 tcg_temp_free_vec(t);
6664}
6665
f7f2f0fa
RH
6666static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6667static const GVecGen2i shrnt_ops[3] = {
6668 { .fni8 = gen_shrnt16_i64,
6669 .fniv = gen_shrnt_vec,
6670 .opt_opc = shrnt_vec_list,
6671 .load_dest = true,
6672 .fno = gen_helper_sve2_shrnt_h,
6673 .vece = MO_16 },
6674 { .fni8 = gen_shrnt32_i64,
6675 .fniv = gen_shrnt_vec,
6676 .opt_opc = shrnt_vec_list,
6677 .load_dest = true,
6678 .fno = gen_helper_sve2_shrnt_s,
6679 .vece = MO_32 },
6680 { .fni8 = gen_shrnt64_i64,
6681 .fniv = gen_shrnt_vec,
6682 .opt_opc = shrnt_vec_list,
6683 .load_dest = true,
6684 .fno = gen_helper_sve2_shrnt_d,
6685 .vece = MO_64 },
6686};
6687TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
46d111b2 6688
f7f2f0fa
RH
6689static const GVecGen2i rshrnb_ops[3] = {
6690 { .fno = gen_helper_sve2_rshrnb_h },
6691 { .fno = gen_helper_sve2_rshrnb_s },
6692 { .fno = gen_helper_sve2_rshrnb_d },
6693};
6694TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
46d111b2 6695
f7f2f0fa
RH
6696static const GVecGen2i rshrnt_ops[3] = {
6697 { .fno = gen_helper_sve2_rshrnt_h },
6698 { .fno = gen_helper_sve2_rshrnt_s },
6699 { .fno = gen_helper_sve2_rshrnt_d },
6700};
6701TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
81fd3e6e
RH
6702
6703static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6704 TCGv_vec n, int64_t shr)
6705{
6706 TCGv_vec t = tcg_temp_new_vec_matching(d);
6707 int halfbits = 4 << vece;
6708
6709 tcg_gen_sari_vec(vece, n, n, shr);
6710 tcg_gen_dupi_vec(vece, t, 0);
6711 tcg_gen_smax_vec(vece, n, n, t);
6712 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6713 tcg_gen_umin_vec(vece, d, n, t);
6714 tcg_temp_free_vec(t);
6715}
6716
f7f2f0fa
RH
6717static const TCGOpcode sqshrunb_vec_list[] = {
6718 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6719};
6720static const GVecGen2i sqshrunb_ops[3] = {
6721 { .fniv = gen_sqshrunb_vec,
6722 .opt_opc = sqshrunb_vec_list,
6723 .fno = gen_helper_sve2_sqshrunb_h,
6724 .vece = MO_16 },
6725 { .fniv = gen_sqshrunb_vec,
6726 .opt_opc = sqshrunb_vec_list,
6727 .fno = gen_helper_sve2_sqshrunb_s,
6728 .vece = MO_32 },
6729 { .fniv = gen_sqshrunb_vec,
6730 .opt_opc = sqshrunb_vec_list,
6731 .fno = gen_helper_sve2_sqshrunb_d,
6732 .vece = MO_64 },
6733};
6734TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
81fd3e6e
RH
6735
6736static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6737 TCGv_vec n, int64_t shr)
6738{
6739 TCGv_vec t = tcg_temp_new_vec_matching(d);
6740 int halfbits = 4 << vece;
6741
6742 tcg_gen_sari_vec(vece, n, n, shr);
6743 tcg_gen_dupi_vec(vece, t, 0);
6744 tcg_gen_smax_vec(vece, n, n, t);
6745 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6746 tcg_gen_umin_vec(vece, n, n, t);
6747 tcg_gen_shli_vec(vece, n, n, halfbits);
6748 tcg_gen_bitsel_vec(vece, d, t, d, n);
6749 tcg_temp_free_vec(t);
6750}
6751
f7f2f0fa
RH
6752static const TCGOpcode sqshrunt_vec_list[] = {
6753 INDEX_op_shli_vec, INDEX_op_sari_vec,
6754 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6755};
6756static const GVecGen2i sqshrunt_ops[3] = {
6757 { .fniv = gen_sqshrunt_vec,
6758 .opt_opc = sqshrunt_vec_list,
6759 .load_dest = true,
6760 .fno = gen_helper_sve2_sqshrunt_h,
6761 .vece = MO_16 },
6762 { .fniv = gen_sqshrunt_vec,
6763 .opt_opc = sqshrunt_vec_list,
6764 .load_dest = true,
6765 .fno = gen_helper_sve2_sqshrunt_s,
6766 .vece = MO_32 },
6767 { .fniv = gen_sqshrunt_vec,
6768 .opt_opc = sqshrunt_vec_list,
6769 .load_dest = true,
6770 .fno = gen_helper_sve2_sqshrunt_d,
6771 .vece = MO_64 },
6772};
6773TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
81fd3e6e 6774
f7f2f0fa
RH
6775static const GVecGen2i sqrshrunb_ops[3] = {
6776 { .fno = gen_helper_sve2_sqrshrunb_h },
6777 { .fno = gen_helper_sve2_sqrshrunb_s },
6778 { .fno = gen_helper_sve2_sqrshrunb_d },
6779};
6780TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
81fd3e6e 6781
f7f2f0fa
RH
6782static const GVecGen2i sqrshrunt_ops[3] = {
6783 { .fno = gen_helper_sve2_sqrshrunt_h },
6784 { .fno = gen_helper_sve2_sqrshrunt_s },
6785 { .fno = gen_helper_sve2_sqrshrunt_d },
6786};
6787TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
c13418da 6788
743bb147
RH
6789static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6790 TCGv_vec n, int64_t shr)
6791{
6792 TCGv_vec t = tcg_temp_new_vec_matching(d);
6793 int halfbits = 4 << vece;
6794 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6795 int64_t min = -max - 1;
6796
6797 tcg_gen_sari_vec(vece, n, n, shr);
6798 tcg_gen_dupi_vec(vece, t, min);
6799 tcg_gen_smax_vec(vece, n, n, t);
6800 tcg_gen_dupi_vec(vece, t, max);
6801 tcg_gen_smin_vec(vece, n, n, t);
6802 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6803 tcg_gen_and_vec(vece, d, n, t);
6804 tcg_temp_free_vec(t);
6805}
6806
f7f2f0fa
RH
6807static const TCGOpcode sqshrnb_vec_list[] = {
6808 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6809};
6810static const GVecGen2i sqshrnb_ops[3] = {
6811 { .fniv = gen_sqshrnb_vec,
6812 .opt_opc = sqshrnb_vec_list,
6813 .fno = gen_helper_sve2_sqshrnb_h,
6814 .vece = MO_16 },
6815 { .fniv = gen_sqshrnb_vec,
6816 .opt_opc = sqshrnb_vec_list,
6817 .fno = gen_helper_sve2_sqshrnb_s,
6818 .vece = MO_32 },
6819 { .fniv = gen_sqshrnb_vec,
6820 .opt_opc = sqshrnb_vec_list,
6821 .fno = gen_helper_sve2_sqshrnb_d,
6822 .vece = MO_64 },
6823};
6824TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
743bb147
RH
6825
6826static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6827 TCGv_vec n, int64_t shr)
6828{
6829 TCGv_vec t = tcg_temp_new_vec_matching(d);
6830 int halfbits = 4 << vece;
6831 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6832 int64_t min = -max - 1;
6833
6834 tcg_gen_sari_vec(vece, n, n, shr);
6835 tcg_gen_dupi_vec(vece, t, min);
6836 tcg_gen_smax_vec(vece, n, n, t);
6837 tcg_gen_dupi_vec(vece, t, max);
6838 tcg_gen_smin_vec(vece, n, n, t);
6839 tcg_gen_shli_vec(vece, n, n, halfbits);
6840 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6841 tcg_gen_bitsel_vec(vece, d, t, d, n);
6842 tcg_temp_free_vec(t);
6843}
6844
f7f2f0fa
RH
6845static const TCGOpcode sqshrnt_vec_list[] = {
6846 INDEX_op_shli_vec, INDEX_op_sari_vec,
6847 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6848};
6849static const GVecGen2i sqshrnt_ops[3] = {
6850 { .fniv = gen_sqshrnt_vec,
6851 .opt_opc = sqshrnt_vec_list,
6852 .load_dest = true,
6853 .fno = gen_helper_sve2_sqshrnt_h,
6854 .vece = MO_16 },
6855 { .fniv = gen_sqshrnt_vec,
6856 .opt_opc = sqshrnt_vec_list,
6857 .load_dest = true,
6858 .fno = gen_helper_sve2_sqshrnt_s,
6859 .vece = MO_32 },
6860 { .fniv = gen_sqshrnt_vec,
6861 .opt_opc = sqshrnt_vec_list,
6862 .load_dest = true,
6863 .fno = gen_helper_sve2_sqshrnt_d,
6864 .vece = MO_64 },
6865};
6866TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
743bb147 6867
f7f2f0fa
RH
6868static const GVecGen2i sqrshrnb_ops[3] = {
6869 { .fno = gen_helper_sve2_sqrshrnb_h },
6870 { .fno = gen_helper_sve2_sqrshrnb_s },
6871 { .fno = gen_helper_sve2_sqrshrnb_d },
6872};
6873TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
743bb147 6874
f7f2f0fa
RH
6875static const GVecGen2i sqrshrnt_ops[3] = {
6876 { .fno = gen_helper_sve2_sqrshrnt_h },
6877 { .fno = gen_helper_sve2_sqrshrnt_s },
6878 { .fno = gen_helper_sve2_sqrshrnt_d },
6879};
6880TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
743bb147 6881
c13418da
RH
6882static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6883 TCGv_vec n, int64_t shr)
6884{
6885 TCGv_vec t = tcg_temp_new_vec_matching(d);
6886 int halfbits = 4 << vece;
6887
6888 tcg_gen_shri_vec(vece, n, n, shr);
6889 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6890 tcg_gen_umin_vec(vece, d, n, t);
6891 tcg_temp_free_vec(t);
6892}
6893
f7f2f0fa
RH
6894static const TCGOpcode uqshrnb_vec_list[] = {
6895 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6896};
6897static const GVecGen2i uqshrnb_ops[3] = {
6898 { .fniv = gen_uqshrnb_vec,
6899 .opt_opc = uqshrnb_vec_list,
6900 .fno = gen_helper_sve2_uqshrnb_h,
6901 .vece = MO_16 },
6902 { .fniv = gen_uqshrnb_vec,
6903 .opt_opc = uqshrnb_vec_list,
6904 .fno = gen_helper_sve2_uqshrnb_s,
6905 .vece = MO_32 },
6906 { .fniv = gen_uqshrnb_vec,
6907 .opt_opc = uqshrnb_vec_list,
6908 .fno = gen_helper_sve2_uqshrnb_d,
6909 .vece = MO_64 },
6910};
6911TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
c13418da
RH
6912
6913static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6914 TCGv_vec n, int64_t shr)
6915{
6916 TCGv_vec t = tcg_temp_new_vec_matching(d);
6917 int halfbits = 4 << vece;
6918
6919 tcg_gen_shri_vec(vece, n, n, shr);
6920 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6921 tcg_gen_umin_vec(vece, n, n, t);
6922 tcg_gen_shli_vec(vece, n, n, halfbits);
6923 tcg_gen_bitsel_vec(vece, d, t, d, n);
6924 tcg_temp_free_vec(t);
6925}
6926
f7f2f0fa
RH
6927static const TCGOpcode uqshrnt_vec_list[] = {
6928 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6929};
6930static const GVecGen2i uqshrnt_ops[3] = {
6931 { .fniv = gen_uqshrnt_vec,
6932 .opt_opc = uqshrnt_vec_list,
6933 .load_dest = true,
6934 .fno = gen_helper_sve2_uqshrnt_h,
6935 .vece = MO_16 },
6936 { .fniv = gen_uqshrnt_vec,
6937 .opt_opc = uqshrnt_vec_list,
6938 .load_dest = true,
6939 .fno = gen_helper_sve2_uqshrnt_s,
6940 .vece = MO_32 },
6941 { .fniv = gen_uqshrnt_vec,
6942 .opt_opc = uqshrnt_vec_list,
6943 .load_dest = true,
6944 .fno = gen_helper_sve2_uqshrnt_d,
6945 .vece = MO_64 },
6946};
6947TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
c13418da 6948
f7f2f0fa
RH
6949static const GVecGen2i uqrshrnb_ops[3] = {
6950 { .fno = gen_helper_sve2_uqrshrnb_h },
6951 { .fno = gen_helper_sve2_uqrshrnb_s },
6952 { .fno = gen_helper_sve2_uqrshrnb_d },
6953};
6954TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
c13418da 6955
f7f2f0fa
RH
6956static const GVecGen2i uqrshrnt_ops[3] = {
6957 { .fno = gen_helper_sve2_uqrshrnt_h },
6958 { .fno = gen_helper_sve2_uqrshrnt_s },
6959 { .fno = gen_helper_sve2_uqrshrnt_d },
6960};
6961TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
b87dbeeb 6962
40d5ea50 6963#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 6964 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
6965 NULL, gen_helper_sve2_##name##_h, \
6966 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6967 }; \
bd394cf5
RH
6968 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
6969 name##_fns[a->esz], a, 0)
40d5ea50
SL
6970
6971DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
6972DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
6973DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
6974DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 6975
c3cd6766
SL
6976DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
6977DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
6978DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
6979DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 6980
ef75309b
RH
6981static gen_helper_gvec_flags_4 * const match_fns[4] = {
6982 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
6983};
6984TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 6985
ef75309b
RH
6986static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
6987 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
6988};
6989TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 6990
5880bdc0
RH
6991static gen_helper_gvec_4 * const histcnt_fns[4] = {
6992 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
6993};
6994TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
6995 histcnt_fns[a->esz], a, 0)
7d47ac94 6996
bd394cf5
RH
6997TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
6998 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 6999
7de2617b
RH
7000DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7001DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7002DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7003DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7004DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
bfc9307e
RH
7005
7006/*
7007 * SVE Integer Multiply-Add (unpredicated)
7008 */
7009
25aee7cc
RH
7010TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7011 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7012TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7013 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
4f26756b 7014
eeb4e84d
RH
7015static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7016 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7017 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7018};
7019TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7020 sqdmlal_zzzw_fns[a->esz], a, 0)
7021TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7022 sqdmlal_zzzw_fns[a->esz], a, 3)
7023TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7024 sqdmlal_zzzw_fns[a->esz], a, 2)
7025
7026static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7027 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7028 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7029};
7030TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7031 sqdmlsl_zzzw_fns[a->esz], a, 0)
7032TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7033 sqdmlsl_zzzw_fns[a->esz], a, 3)
7034TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7035 sqdmlsl_zzzw_fns[a->esz], a, 2)
7036
7037static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7038 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7039 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7040};
7041TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7042 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7043
eeb4e84d
RH
7044static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7045 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7046 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7047};
7048TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7049 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7050
eeb4e84d
RH
7051static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7052 NULL, gen_helper_sve2_smlal_zzzw_h,
7053 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7054};
7055TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7056 smlal_zzzw_fns[a->esz], a, 0)
7057TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7058 smlal_zzzw_fns[a->esz], a, 1)
7059
7060static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7061 NULL, gen_helper_sve2_umlal_zzzw_h,
7062 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7063};
7064TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7065 umlal_zzzw_fns[a->esz], a, 0)
7066TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7067 umlal_zzzw_fns[a->esz], a, 1)
7068
7069static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7070 NULL, gen_helper_sve2_smlsl_zzzw_h,
7071 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7072};
7073TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7074 smlsl_zzzw_fns[a->esz], a, 0)
7075TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7076 smlsl_zzzw_fns[a->esz], a, 1)
7077
7078static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7079 NULL, gen_helper_sve2_umlsl_zzzw_h,
7080 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7081};
7082TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7083 umlsl_zzzw_fns[a->esz], a, 0)
7084TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7085 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7086
5f425b92
RH
7087static gen_helper_gvec_4 * const cmla_fns[] = {
7088 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7089 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7090};
7091TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7092 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7093
5f425b92
RH
7094static gen_helper_gvec_4 * const cdot_fns[] = {
7095 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7096};
7097TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7098 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7099
5f425b92
RH
7100static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7101 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7102 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7103};
7104TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7105 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7106
8740d694
RH
7107TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7108 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7109
0ea3cdbf
RH
7110TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7111 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7112
32e2ad65
RH
7113TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7114 gen_helper_crypto_aese, a, false)
7115TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7116 gen_helper_crypto_aese, a, true)
3cc7a88e 7117
32e2ad65
RH
7118TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7119 gen_helper_crypto_sm4e, a, 0)
7120TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7121 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7122
2aa469ff 7123TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
5c1b7226 7124
0360730c
RH
7125TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7126 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7127TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7128 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
83c2523f 7129
0360730c
RH
7130TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7131 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
83c2523f 7132
0360730c
RH
7133TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7134 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7135TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7136 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
95365277 7137
27645836
RH
7138TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7139 float_round_to_odd, gen_helper_sve_fcvt_ds)
7140TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7141 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
631be02e 7142
7b9dfcfe
RH
7143static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7144 NULL, gen_helper_flogb_h,
7145 gen_helper_flogb_s, gen_helper_flogb_d
7146};
7147TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7148 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
50d102bd
SL
7149
7150static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7151{
41bf9b67
RH
7152 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7153 a->rd, a->rn, a->rm, a->ra,
7154 (sel << 1) | sub, cpu_env);
50d102bd
SL
7155}
7156
72c7f906
RH
7157TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7158TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7159TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7160TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
50d102bd
SL
7161
7162static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7163{
41bf9b67
RH
7164 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7165 a->rd, a->rn, a->rm, a->ra,
7166 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7167}
7168
fc7c8829
RH
7169TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7170TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7171TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7172TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
2323c5ff 7173
eec05e4e
RH
7174TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7175 gen_helper_gvec_smmla_b, a, 0)
7176TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7177 gen_helper_gvec_usmmla_b, a, 0)
7178TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7179 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7180
eec05e4e
RH
7181TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7182 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7183TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7184 gen_helper_gvec_bfdot_idx, a)
81266a1f 7185
eec05e4e
RH
7186TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7187 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7188
7189static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7190{
41bf9b67
RH
7191 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7192 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7193}
7194
698ddb9d
RH
7195TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7196TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
458d0ab6
RH
7197
7198static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7199{
41bf9b67
RH
7200 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7201 a->rd, a->rn, a->rm, a->ra,
7202 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7203}
7204
698ddb9d
RH
7205TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7206TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)