]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for DO_FP_IMM
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
de58c6b0
RH
155static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
156 int rd, int rn, int data,
157 ARMFPStatusFlavour flavour)
158{
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 TCGv_ptr status = fpstatus_ptr(flavour);
165
166 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
167 vec_full_reg_offset(s, rn),
168 status, vsz, vsz, data, fn);
169 tcg_temp_free_ptr(status);
170 }
171 return true;
172}
173
174static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
175 arg_rr_esz *a, int data)
176{
177 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
178 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
179}
180
e645d1a1 181/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 182static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
183 int rd, int rn, int rm, int data)
184{
913a8a00
RH
185 if (fn == NULL) {
186 return false;
187 }
188 if (sve_access_check(s)) {
189 unsigned vsz = vec_full_reg_size(s);
190 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
191 vec_full_reg_offset(s, rn),
192 vec_full_reg_offset(s, rm),
193 vsz, vsz, data, fn);
194 }
195 return true;
e645d1a1
RH
196}
197
84a272f5
RH
198static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
199 arg_rrr_esz *a, int data)
200{
201 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
202}
203
532724e4
RH
204/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
205static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
206 int rd, int rn, int rm,
207 int data, ARMFPStatusFlavour flavour)
208{
209 if (fn == NULL) {
210 return false;
211 }
212 if (sve_access_check(s)) {
213 unsigned vsz = vec_full_reg_size(s);
214 TCGv_ptr status = fpstatus_ptr(flavour);
215
216 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
217 vec_full_reg_offset(s, rn),
218 vec_full_reg_offset(s, rm),
219 status, vsz, vsz, data, fn);
220
221 tcg_temp_free_ptr(status);
222 }
223 return true;
224}
225
226static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
227 arg_rrr_esz *a, int data)
228{
229 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
230 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
231}
232
38650638 233/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 234static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
235 int rd, int rn, int rm, int ra, int data)
236{
7ad416b1
RH
237 if (fn == NULL) {
238 return false;
239 }
240 if (sve_access_check(s)) {
241 unsigned vsz = vec_full_reg_size(s);
242 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
243 vec_full_reg_offset(s, rn),
244 vec_full_reg_offset(s, rm),
245 vec_full_reg_offset(s, ra),
246 vsz, vsz, data, fn);
247 }
248 return true;
38650638
RH
249}
250
cab79ac9
RH
251static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
252 arg_rrrr_esz *a, int data)
253{
254 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
255}
256
e82d3536
RH
257static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
258 arg_rrxr_esz *a)
259{
260 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
261}
262
41bf9b67
RH
263/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
264static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
265 int rd, int rn, int rm, int ra,
266 int data, TCGv_ptr ptr)
267{
268 if (fn == NULL) {
269 return false;
270 }
271 if (sve_access_check(s)) {
272 unsigned vsz = vec_full_reg_size(s);
273 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
274 vec_full_reg_offset(s, rn),
275 vec_full_reg_offset(s, rm),
276 vec_full_reg_offset(s, ra),
277 ptr, vsz, vsz, data, fn);
278 }
279 return true;
280}
281
282static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
283 int rd, int rn, int rm, int ra,
284 int data, ARMFPStatusFlavour flavour)
285{
286 TCGv_ptr status = fpstatus_ptr(flavour);
287 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
288 tcg_temp_free_ptr(status);
289 return ret;
290}
291
e14da110
RH
292/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
293static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
294 int rd, int rn, int rm, int ra, int pg,
295 int data, ARMFPStatusFlavour flavour)
296{
297 if (fn == NULL) {
298 return false;
299 }
300 if (sve_access_check(s)) {
301 unsigned vsz = vec_full_reg_size(s);
302 TCGv_ptr status = fpstatus_ptr(flavour);
303
304 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
305 vec_full_reg_offset(s, rn),
306 vec_full_reg_offset(s, rm),
307 vec_full_reg_offset(s, ra),
308 pred_full_reg_offset(s, pg),
309 status, vsz, vsz, data, fn);
310
311 tcg_temp_free_ptr(status);
312 }
313 return true;
314}
315
96a461f7 316/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 317static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
318 int rd, int rn, int pg, int data)
319{
8fb27a21
RH
320 if (fn == NULL) {
321 return false;
322 }
323 if (sve_access_check(s)) {
324 unsigned vsz = vec_full_reg_size(s);
325 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
326 vec_full_reg_offset(s, rn),
327 pred_full_reg_offset(s, pg),
328 vsz, vsz, data, fn);
329 }
330 return true;
96a461f7
RH
331}
332
b051809a
RH
333static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
334 arg_rpr_esz *a, int data)
335{
336 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
337}
338
afa2529c
RH
339static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
340 arg_rpri_esz *a)
341{
342 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
343}
b051809a 344
0360730c
RH
345static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
346 int rd, int rn, int pg, int data,
347 ARMFPStatusFlavour flavour)
348{
349 if (fn == NULL) {
350 return false;
351 }
352 if (sve_access_check(s)) {
353 unsigned vsz = vec_full_reg_size(s);
354 TCGv_ptr status = fpstatus_ptr(flavour);
355
356 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
357 vec_full_reg_offset(s, rn),
358 pred_full_reg_offset(s, pg),
359 status, vsz, vsz, data, fn);
360 tcg_temp_free_ptr(status);
361 }
362 return true;
363}
364
365static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
366 arg_rpr_esz *a, int data,
367 ARMFPStatusFlavour flavour)
368{
369 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
370}
371
36cbb7a8 372/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 373static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
374 int rd, int rn, int rm, int pg, int data)
375{
2a753d1e
RH
376 if (fn == NULL) {
377 return false;
378 }
379 if (sve_access_check(s)) {
380 unsigned vsz = vec_full_reg_size(s);
381 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
382 vec_full_reg_offset(s, rn),
383 vec_full_reg_offset(s, rm),
384 pred_full_reg_offset(s, pg),
385 vsz, vsz, data, fn);
386 }
387 return true;
36cbb7a8 388}
f7d79c41 389
312016c9
RH
390static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
391 arg_rprr_esz *a, int data)
392{
393 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
394}
395
7e2d07ff
RH
396/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
397static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
398 int rd, int rn, int rm, int pg, int data,
399 ARMFPStatusFlavour flavour)
400{
401 if (fn == NULL) {
402 return false;
403 }
404 if (sve_access_check(s)) {
405 unsigned vsz = vec_full_reg_size(s);
406 TCGv_ptr status = fpstatus_ptr(flavour);
407
408 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
409 vec_full_reg_offset(s, rn),
410 vec_full_reg_offset(s, rm),
411 pred_full_reg_offset(s, pg),
412 status, vsz, vsz, data, fn);
413 tcg_temp_free_ptr(status);
414 }
415 return true;
416}
417
418static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
419 arg_rprr_esz *a)
420{
421 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
422 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
423}
424
faf915e2
RH
425/* Invoke a vector expander on two Zregs and an immediate. */
426static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
427 int esz, int rd, int rn, uint64_t imm)
428{
429 if (gvec_fn == NULL) {
430 return false;
431 }
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gvec_fn(esz, vec_full_reg_offset(s, rd),
435 vec_full_reg_offset(s, rn), imm, vsz, vsz);
436 }
437 return true;
438}
439
ada378f0
RH
440static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
441 arg_rri_esz *a)
442{
443 if (a->esz < 0) {
444 /* Invalid tsz encoding -- see tszimm_esz. */
445 return false;
446 }
447 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
448}
449
39eea561 450/* Invoke a vector expander on three Zregs. */
50f6db5f 451static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 452 int esz, int rd, int rn, int rm)
38388f7e 453{
50f6db5f
RH
454 if (gvec_fn == NULL) {
455 return false;
456 }
457 if (sve_access_check(s)) {
458 unsigned vsz = vec_full_reg_size(s);
459 gvec_fn(esz, vec_full_reg_offset(s, rd),
460 vec_full_reg_offset(s, rn),
461 vec_full_reg_offset(s, rm), vsz, vsz);
462 }
463 return true;
38388f7e
RH
464}
465
cd54bbe6
RH
466static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
467 arg_rrr_esz *a)
468{
469 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
470}
471
911cdc6d 472/* Invoke a vector expander on four Zregs. */
189876af
RH
473static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
474 arg_rrrr_esz *a)
911cdc6d 475{
189876af
RH
476 if (gvec_fn == NULL) {
477 return false;
478 }
479 if (sve_access_check(s)) {
480 unsigned vsz = vec_full_reg_size(s);
481 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
482 vec_full_reg_offset(s, a->rn),
483 vec_full_reg_offset(s, a->rm),
484 vec_full_reg_offset(s, a->ra), vsz, vsz);
485 }
486 return true;
911cdc6d
RH
487}
488
39eea561
RH
489/* Invoke a vector move on two Zregs. */
490static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 491{
f7d79c41 492 if (sve_access_check(s)) {
5f730621
RH
493 unsigned vsz = vec_full_reg_size(s);
494 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
495 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
496 }
497 return true;
38388f7e
RH
498}
499
d9d78dcc
RH
500/* Initialize a Zreg with replications of a 64-bit immediate. */
501static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
502{
503 unsigned vsz = vec_full_reg_size(s);
8711e71f 504 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
505}
506
516e246a 507/* Invoke a vector expander on three Pregs. */
23e5fa5f 508static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
dd81a8d7 509 int rd, int rn, int rm)
516e246a 510{
23e5fa5f
RH
511 if (sve_access_check(s)) {
512 unsigned psz = pred_gvec_reg_size(s);
513 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
514 pred_full_reg_offset(s, rn),
515 pred_full_reg_offset(s, rm), psz, psz);
516 }
517 return true;
516e246a
RH
518}
519
520/* Invoke a vector move on two Pregs. */
521static bool do_mov_p(DisasContext *s, int rd, int rn)
522{
d0b2df5a
RH
523 if (sve_access_check(s)) {
524 unsigned psz = pred_gvec_reg_size(s);
525 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
526 pred_full_reg_offset(s, rn), psz, psz);
527 }
528 return true;
516e246a
RH
529}
530
9e18d7a6
RH
531/* Set the cpu flags as per a return from an SVE helper. */
532static void do_pred_flags(TCGv_i32 t)
533{
534 tcg_gen_mov_i32(cpu_NF, t);
535 tcg_gen_andi_i32(cpu_ZF, t, 2);
536 tcg_gen_andi_i32(cpu_CF, t, 1);
537 tcg_gen_movi_i32(cpu_VF, 0);
538}
539
540/* Subroutines computing the ARM PredTest psuedofunction. */
541static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
542{
543 TCGv_i32 t = tcg_temp_new_i32();
544
545 gen_helper_sve_predtest1(t, d, g);
546 do_pred_flags(t);
547 tcg_temp_free_i32(t);
548}
549
550static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
551{
552 TCGv_ptr dptr = tcg_temp_new_ptr();
553 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 554 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
555
556 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
557 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 558
392acacc 559 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
560 tcg_temp_free_ptr(dptr);
561 tcg_temp_free_ptr(gptr);
562
563 do_pred_flags(t);
564 tcg_temp_free_i32(t);
565}
566
028e2a7b
RH
567/* For each element size, the bits within a predicate word that are active. */
568const uint64_t pred_esz_masks[4] = {
569 0xffffffffffffffffull, 0x5555555555555555ull,
570 0x1111111111111111ull, 0x0101010101010101ull
571};
572
c437c59b
RH
573static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
574{
575 unallocated_encoding(s);
576 return true;
577}
578
39eea561
RH
579/*
580 *** SVE Logical - Unpredicated Group
581 */
582
b262215b
RH
583TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
584TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
585TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
586TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 587
e6eba6e5
RH
588static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
589{
590 TCGv_i64 t = tcg_temp_new_i64();
591 uint64_t mask = dup_const(MO_8, 0xff >> sh);
592
593 tcg_gen_xor_i64(t, n, m);
594 tcg_gen_shri_i64(d, t, sh);
595 tcg_gen_shli_i64(t, t, 8 - sh);
596 tcg_gen_andi_i64(d, d, mask);
597 tcg_gen_andi_i64(t, t, ~mask);
598 tcg_gen_or_i64(d, d, t);
599 tcg_temp_free_i64(t);
600}
601
602static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
603{
604 TCGv_i64 t = tcg_temp_new_i64();
605 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
606
607 tcg_gen_xor_i64(t, n, m);
608 tcg_gen_shri_i64(d, t, sh);
609 tcg_gen_shli_i64(t, t, 16 - sh);
610 tcg_gen_andi_i64(d, d, mask);
611 tcg_gen_andi_i64(t, t, ~mask);
612 tcg_gen_or_i64(d, d, t);
613 tcg_temp_free_i64(t);
614}
615
616static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
617{
618 tcg_gen_xor_i32(d, n, m);
619 tcg_gen_rotri_i32(d, d, sh);
620}
621
622static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
623{
624 tcg_gen_xor_i64(d, n, m);
625 tcg_gen_rotri_i64(d, d, sh);
626}
627
628static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
629 TCGv_vec m, int64_t sh)
630{
631 tcg_gen_xor_vec(vece, d, n, m);
632 tcg_gen_rotri_vec(vece, d, d, sh);
633}
634
635void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
636 uint32_t rm_ofs, int64_t shift,
637 uint32_t opr_sz, uint32_t max_sz)
638{
639 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
640 static const GVecGen3i ops[4] = {
641 { .fni8 = gen_xar8_i64,
642 .fniv = gen_xar_vec,
643 .fno = gen_helper_sve2_xar_b,
644 .opt_opc = vecop,
645 .vece = MO_8 },
646 { .fni8 = gen_xar16_i64,
647 .fniv = gen_xar_vec,
648 .fno = gen_helper_sve2_xar_h,
649 .opt_opc = vecop,
650 .vece = MO_16 },
651 { .fni4 = gen_xar_i32,
652 .fniv = gen_xar_vec,
653 .fno = gen_helper_sve2_xar_s,
654 .opt_opc = vecop,
655 .vece = MO_32 },
656 { .fni8 = gen_xar_i64,
657 .fniv = gen_xar_vec,
658 .fno = gen_helper_gvec_xar_d,
659 .opt_opc = vecop,
660 .vece = MO_64 }
661 };
662 int esize = 8 << vece;
663
664 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
665 tcg_debug_assert(shift >= 0);
666 tcg_debug_assert(shift <= esize);
667 shift &= esize - 1;
668
669 if (shift == 0) {
670 /* xar with no rotate devolves to xor. */
671 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
672 } else {
673 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
674 shift, &ops[vece]);
675 }
676}
677
678static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
679{
680 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
681 return false;
682 }
683 if (sve_access_check(s)) {
684 unsigned vsz = vec_full_reg_size(s);
685 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
686 vec_full_reg_offset(s, a->rn),
687 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
688 }
689 return true;
690}
691
911cdc6d
RH
692static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
693{
694 tcg_gen_xor_i64(d, n, m);
695 tcg_gen_xor_i64(d, d, k);
696}
697
698static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
699 TCGv_vec m, TCGv_vec k)
700{
701 tcg_gen_xor_vec(vece, d, n, m);
702 tcg_gen_xor_vec(vece, d, d, k);
703}
704
705static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
706 uint32_t a, uint32_t oprsz, uint32_t maxsz)
707{
708 static const GVecGen4 op = {
709 .fni8 = gen_eor3_i64,
710 .fniv = gen_eor3_vec,
711 .fno = gen_helper_sve2_eor3,
712 .vece = MO_64,
713 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
714 };
715 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
716}
717
b773a5c8 718TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
719
720static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
721{
722 tcg_gen_andc_i64(d, m, k);
723 tcg_gen_xor_i64(d, d, n);
724}
725
726static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
727 TCGv_vec m, TCGv_vec k)
728{
729 tcg_gen_andc_vec(vece, d, m, k);
730 tcg_gen_xor_vec(vece, d, d, n);
731}
732
733static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
734 uint32_t a, uint32_t oprsz, uint32_t maxsz)
735{
736 static const GVecGen4 op = {
737 .fni8 = gen_bcax_i64,
738 .fniv = gen_bcax_vec,
739 .fno = gen_helper_sve2_bcax,
740 .vece = MO_64,
741 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
742 };
743 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
744}
745
b773a5c8 746TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
747
748static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
749 uint32_t a, uint32_t oprsz, uint32_t maxsz)
750{
751 /* BSL differs from the generic bitsel in argument ordering. */
752 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
753}
754
b773a5c8 755TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
756
757static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
758{
759 tcg_gen_andc_i64(n, k, n);
760 tcg_gen_andc_i64(m, m, k);
761 tcg_gen_or_i64(d, n, m);
762}
763
764static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
765 TCGv_vec m, TCGv_vec k)
766{
767 if (TCG_TARGET_HAS_bitsel_vec) {
768 tcg_gen_not_vec(vece, n, n);
769 tcg_gen_bitsel_vec(vece, d, k, n, m);
770 } else {
771 tcg_gen_andc_vec(vece, n, k, n);
772 tcg_gen_andc_vec(vece, m, m, k);
773 tcg_gen_or_vec(vece, d, n, m);
774 }
775}
776
777static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
778 uint32_t a, uint32_t oprsz, uint32_t maxsz)
779{
780 static const GVecGen4 op = {
781 .fni8 = gen_bsl1n_i64,
782 .fniv = gen_bsl1n_vec,
783 .fno = gen_helper_sve2_bsl1n,
784 .vece = MO_64,
785 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
786 };
787 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
788}
789
b773a5c8 790TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
791
792static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
793{
794 /*
795 * Z[dn] = (n & k) | (~m & ~k)
796 * = | ~(m | k)
797 */
798 tcg_gen_and_i64(n, n, k);
799 if (TCG_TARGET_HAS_orc_i64) {
800 tcg_gen_or_i64(m, m, k);
801 tcg_gen_orc_i64(d, n, m);
802 } else {
803 tcg_gen_nor_i64(m, m, k);
804 tcg_gen_or_i64(d, n, m);
805 }
806}
807
808static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
809 TCGv_vec m, TCGv_vec k)
810{
811 if (TCG_TARGET_HAS_bitsel_vec) {
812 tcg_gen_not_vec(vece, m, m);
813 tcg_gen_bitsel_vec(vece, d, k, n, m);
814 } else {
815 tcg_gen_and_vec(vece, n, n, k);
816 tcg_gen_or_vec(vece, m, m, k);
817 tcg_gen_orc_vec(vece, d, n, m);
818 }
819}
820
821static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
822 uint32_t a, uint32_t oprsz, uint32_t maxsz)
823{
824 static const GVecGen4 op = {
825 .fni8 = gen_bsl2n_i64,
826 .fniv = gen_bsl2n_vec,
827 .fno = gen_helper_sve2_bsl2n,
828 .vece = MO_64,
829 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
830 };
831 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
832}
833
b773a5c8 834TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
835
836static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
837{
838 tcg_gen_and_i64(n, n, k);
839 tcg_gen_andc_i64(m, m, k);
840 tcg_gen_nor_i64(d, n, m);
841}
842
843static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
844 TCGv_vec m, TCGv_vec k)
845{
846 tcg_gen_bitsel_vec(vece, d, k, n, m);
847 tcg_gen_not_vec(vece, d, d);
848}
849
850static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
851 uint32_t a, uint32_t oprsz, uint32_t maxsz)
852{
853 static const GVecGen4 op = {
854 .fni8 = gen_nbsl_i64,
855 .fniv = gen_nbsl_vec,
856 .fno = gen_helper_sve2_nbsl,
857 .vece = MO_64,
858 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
859 };
860 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
861}
862
b773a5c8 863TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 864
fea98f9c
RH
865/*
866 *** SVE Integer Arithmetic - Unpredicated Group
867 */
868
b262215b
RH
869TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
870TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
871TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
872TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
873TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
874TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 875
f97cfd59
RH
876/*
877 *** SVE Integer Arithmetic - Binary Predicated Group
878 */
879
a2103582
RH
880/* Select active elememnts from Zn and inactive elements from Zm,
881 * storing the result in Zd.
882 */
68cc4ee3 883static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
884{
885 static gen_helper_gvec_4 * const fns[4] = {
886 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
887 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
888 };
68cc4ee3 889 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
890}
891
8e7acb24
RH
892#define DO_ZPZZ(NAME, FEAT, name) \
893 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
894 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
895 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 896 }; \
8e7acb24
RH
897 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
898 name##_zpzz_fns[a->esz], a, 0)
899
900DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
901DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
902DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
903DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
904
905DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
906DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
907
908DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
909DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
910DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
911DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
912DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
913DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
914
915DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
916DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
917DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
918
919DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
920DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
921DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
922
923static gen_helper_gvec_4 * const sdiv_fns[4] = {
924 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
925};
926TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 927
8e7acb24
RH
928static gen_helper_gvec_4 * const udiv_fns[4] = {
929 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
930};
931TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 932
29693f5f 933TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
d3fe4a29 934
afac6d04
RH
935/*
936 *** SVE Integer Arithmetic - Unary Predicated Group
937 */
938
817bd5c9
RH
939#define DO_ZPZ(NAME, FEAT, name) \
940 static gen_helper_gvec_3 * const name##_fns[4] = { \
941 gen_helper_##name##_b, gen_helper_##name##_h, \
942 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 943 }; \
817bd5c9
RH
944 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
945
946DO_ZPZ(CLS, aa64_sve, sve_cls)
947DO_ZPZ(CLZ, aa64_sve, sve_clz)
948DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
949DO_ZPZ(CNOT, aa64_sve, sve_cnot)
950DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
951DO_ZPZ(ABS, aa64_sve, sve_abs)
952DO_ZPZ(NEG, aa64_sve, sve_neg)
953DO_ZPZ(RBIT, aa64_sve, sve_rbit)
954
955static gen_helper_gvec_3 * const fabs_fns[4] = {
956 NULL, gen_helper_sve_fabs_h,
957 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
958};
959TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 960
817bd5c9
RH
961static gen_helper_gvec_3 * const fneg_fns[4] = {
962 NULL, gen_helper_sve_fneg_h,
963 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
964};
965TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 966
817bd5c9
RH
967static gen_helper_gvec_3 * const sxtb_fns[4] = {
968 NULL, gen_helper_sve_sxtb_h,
969 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
970};
971TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 972
817bd5c9
RH
973static gen_helper_gvec_3 * const uxtb_fns[4] = {
974 NULL, gen_helper_sve_uxtb_h,
975 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
976};
977TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 978
817bd5c9
RH
979static gen_helper_gvec_3 * const sxth_fns[4] = {
980 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
981};
982TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 983
817bd5c9
RH
984static gen_helper_gvec_3 * const uxth_fns[4] = {
985 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
986};
987TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 988
817bd5c9
RH
989TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
990 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
991TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
992 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 993
047cec97
RH
994/*
995 *** SVE Integer Reduction Group
996 */
997
998typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
999static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
1000 gen_helper_gvec_reduc *fn)
1001{
1002 unsigned vsz = vec_full_reg_size(s);
1003 TCGv_ptr t_zn, t_pg;
1004 TCGv_i32 desc;
1005 TCGv_i64 temp;
1006
1007 if (fn == NULL) {
1008 return false;
1009 }
1010 if (!sve_access_check(s)) {
1011 return true;
1012 }
1013
c6a59b55 1014 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
1015 temp = tcg_temp_new_i64();
1016 t_zn = tcg_temp_new_ptr();
1017 t_pg = tcg_temp_new_ptr();
1018
1019 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
1020 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
1021 fn(temp, t_zn, t_pg, desc);
1022 tcg_temp_free_ptr(t_zn);
1023 tcg_temp_free_ptr(t_pg);
047cec97
RH
1024
1025 write_fp_dreg(s, a->rd, temp);
1026 tcg_temp_free_i64(temp);
1027 return true;
1028}
1029
1030#define DO_VPZ(NAME, name) \
9ac24f1f 1031 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
1032 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1033 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1034 }; \
9ac24f1f 1035 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
1036
1037DO_VPZ(ORV, orv)
1038DO_VPZ(ANDV, andv)
1039DO_VPZ(EORV, eorv)
1040
1041DO_VPZ(UADDV, uaddv)
1042DO_VPZ(SMAXV, smaxv)
1043DO_VPZ(UMAXV, umaxv)
1044DO_VPZ(SMINV, sminv)
1045DO_VPZ(UMINV, uminv)
1046
9ac24f1f
RH
1047static gen_helper_gvec_reduc * const saddv_fns[4] = {
1048 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
1049 gen_helper_sve_saddv_s, NULL
1050};
1051TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
1052
1053#undef DO_VPZ
1054
ccd841c3
RH
1055/*
1056 *** SVE Shift by Immediate - Predicated Group
1057 */
1058
60245996
RH
1059/*
1060 * Copy Zn into Zd, storing zeros into inactive elements.
1061 * If invert, store zeros into the active elements.
ccd841c3 1062 */
60245996
RH
1063static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1064 int esz, bool invert)
ccd841c3 1065{
60245996
RH
1066 static gen_helper_gvec_3 * const fns[4] = {
1067 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1068 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 1069 };
8fb27a21 1070 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
1071}
1072
73c558a8
RH
1073static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1074 gen_helper_gvec_3 * const fns[4])
ccd841c3 1075{
73c558a8
RH
1076 int max;
1077
ccd841c3
RH
1078 if (a->esz < 0) {
1079 /* Invalid tsz encoding -- see tszimm_esz. */
1080 return false;
1081 }
73c558a8
RH
1082
1083 /*
1084 * Shift by element size is architecturally valid.
1085 * For arithmetic right-shift, it's the same as by one less.
1086 * For logical shifts and ASRD, it is a zeroing operation.
1087 */
1088 max = 8 << a->esz;
1089 if (a->imm >= max) {
1090 if (asr) {
1091 a->imm = max - 1;
1092 } else {
1093 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1094 }
1095 }
afa2529c 1096 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1097}
1098
5cccd1f1
RH
1099static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1100 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1101 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1102};
1103TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 1104
5cccd1f1
RH
1105static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1106 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1107 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1108};
1109TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 1110
5cccd1f1
RH
1111static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1112 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1113 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1114};
1115TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 1116
5cccd1f1
RH
1117static gen_helper_gvec_3 * const asrd_fns[4] = {
1118 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1119 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1120};
1121TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 1122
4df37e41
RH
1123static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1124 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1125 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1126};
1127TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1128 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1129
4df37e41
RH
1130static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1131 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1132 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1133};
1134TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1135 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1136
4df37e41
RH
1137static gen_helper_gvec_3 * const srshr_fns[4] = {
1138 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1139 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1140};
1141TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1142 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1143
4df37e41
RH
1144static gen_helper_gvec_3 * const urshr_fns[4] = {
1145 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1146 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1147};
1148TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1149 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1150
4df37e41
RH
1151static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1152 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1153 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1154};
1155TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1156 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1157
fe7f8dfb
RH
1158/*
1159 *** SVE Bitwise Shift - Predicated Group
1160 */
1161
1162#define DO_ZPZW(NAME, name) \
8e7acb24 1163 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1164 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1165 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1166 }; \
8e7acb24
RH
1167 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1168 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1169
1170DO_ZPZW(ASR, asr)
1171DO_ZPZW(LSR, lsr)
1172DO_ZPZW(LSL, lsl)
1173
1174#undef DO_ZPZW
1175
d9d78dcc
RH
1176/*
1177 *** SVE Bitwise Shift - Unpredicated Group
1178 */
1179
1180static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1181 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1182 int64_t, uint32_t, uint32_t))
1183{
1184 if (a->esz < 0) {
1185 /* Invalid tsz encoding -- see tszimm_esz. */
1186 return false;
1187 }
1188 if (sve_access_check(s)) {
1189 unsigned vsz = vec_full_reg_size(s);
1190 /* Shift by element size is architecturally valid. For
1191 arithmetic right-shift, it's the same as by one less.
1192 Otherwise it is a zeroing operation. */
1193 if (a->imm >= 8 << a->esz) {
1194 if (asr) {
1195 a->imm = (8 << a->esz) - 1;
1196 } else {
1197 do_dupi_z(s, a->rd, 0);
1198 return true;
1199 }
1200 }
1201 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1202 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1203 }
1204 return true;
1205}
1206
5e612f80
RH
1207TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1208TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1209TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1210
d9d78dcc 1211#define DO_ZZW(NAME, name) \
32e2ad65 1212 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1213 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1214 gen_helper_sve_##name##_zzw_s, NULL \
1215 }; \
32e2ad65
RH
1216 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1217 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1218
32e2ad65
RH
1219DO_ZZW(ASR_zzw, asr)
1220DO_ZZW(LSR_zzw, lsr)
1221DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1222
1223#undef DO_ZZW
1224
96a36e4a
RH
1225/*
1226 *** SVE Integer Multiply-Add Group
1227 */
1228
1229static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1230 gen_helper_gvec_5 *fn)
1231{
1232 if (sve_access_check(s)) {
1233 unsigned vsz = vec_full_reg_size(s);
1234 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1235 vec_full_reg_offset(s, a->ra),
1236 vec_full_reg_offset(s, a->rn),
1237 vec_full_reg_offset(s, a->rm),
1238 pred_full_reg_offset(s, a->pg),
1239 vsz, vsz, 0, fn);
1240 }
1241 return true;
1242}
1243
dc67e645
RH
1244static gen_helper_gvec_5 * const mla_fns[4] = {
1245 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1246 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1247};
1248TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1249
dc67e645
RH
1250static gen_helper_gvec_5 * const mls_fns[4] = {
1251 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1252 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1253};
1254TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1255
9a56c9c3
RH
1256/*
1257 *** SVE Index Generation Group
1258 */
1259
6687d05d 1260static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1261 TCGv_i64 start, TCGv_i64 incr)
1262{
6687d05d
RH
1263 unsigned vsz;
1264 TCGv_i32 desc;
1265 TCGv_ptr t_zd;
1266
1267 if (!sve_access_check(s)) {
1268 return true;
1269 }
1270
1271 vsz = vec_full_reg_size(s);
1272 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1273 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1274
1275 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1276 if (esz == 3) {
1277 gen_helper_sve_index_d(t_zd, start, incr, desc);
1278 } else {
1279 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1280 static index_fn * const fns[3] = {
1281 gen_helper_sve_index_b,
1282 gen_helper_sve_index_h,
1283 gen_helper_sve_index_s,
1284 };
1285 TCGv_i32 s32 = tcg_temp_new_i32();
1286 TCGv_i32 i32 = tcg_temp_new_i32();
1287
1288 tcg_gen_extrl_i64_i32(s32, start);
1289 tcg_gen_extrl_i64_i32(i32, incr);
1290 fns[esz](t_zd, s32, i32, desc);
1291
1292 tcg_temp_free_i32(s32);
1293 tcg_temp_free_i32(i32);
1294 }
1295 tcg_temp_free_ptr(t_zd);
6687d05d 1296 return true;
9a56c9c3
RH
1297}
1298
9aa60c83
RH
1299TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1300 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1301TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1302 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1303TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1304 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1305TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1306 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1307
96f922cc
RH
1308/*
1309 *** SVE Stack Allocation Group
1310 */
1311
3a7be554 1312static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1313{
5de56742
AC
1314 if (sve_access_check(s)) {
1315 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1316 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1317 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1318 }
96f922cc
RH
1319 return true;
1320}
1321
3a7be554 1322static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1323{
5de56742
AC
1324 if (sve_access_check(s)) {
1325 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1326 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1327 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1328 }
96f922cc
RH
1329 return true;
1330}
1331
3a7be554 1332static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1333{
5de56742
AC
1334 if (sve_access_check(s)) {
1335 TCGv_i64 reg = cpu_reg(s, a->rd);
1336 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1337 }
96f922cc
RH
1338 return true;
1339}
1340
4b242d9c
RH
1341/*
1342 *** SVE Compute Vector Address Group
1343 */
1344
1345static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1346{
913a8a00 1347 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1348}
1349
dcba3d67
RH
1350TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1351TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1352TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1353TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1354
0762cd42
RH
1355/*
1356 *** SVE Integer Misc - Unpredicated Group
1357 */
1358
0ea3cdbf
RH
1359static gen_helper_gvec_2 * const fexpa_fns[4] = {
1360 NULL, gen_helper_sve_fexpa_h,
1361 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1362};
1363TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1364 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1365
32e2ad65
RH
1366static gen_helper_gvec_3 * const ftssel_fns[4] = {
1367 NULL, gen_helper_sve_ftssel_h,
1368 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1369};
1370TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1371
516e246a
RH
1372/*
1373 *** SVE Predicate Logical Operations Group
1374 */
1375
1376static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1377 const GVecGen4 *gvec_op)
1378{
1379 if (!sve_access_check(s)) {
1380 return true;
1381 }
1382
1383 unsigned psz = pred_gvec_reg_size(s);
1384 int dofs = pred_full_reg_offset(s, a->rd);
1385 int nofs = pred_full_reg_offset(s, a->rn);
1386 int mofs = pred_full_reg_offset(s, a->rm);
1387 int gofs = pred_full_reg_offset(s, a->pg);
1388
dd81a8d7
RH
1389 if (!a->s) {
1390 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1391 return true;
1392 }
1393
516e246a
RH
1394 if (psz == 8) {
1395 /* Do the operation and the flags generation in temps. */
1396 TCGv_i64 pd = tcg_temp_new_i64();
1397 TCGv_i64 pn = tcg_temp_new_i64();
1398 TCGv_i64 pm = tcg_temp_new_i64();
1399 TCGv_i64 pg = tcg_temp_new_i64();
1400
1401 tcg_gen_ld_i64(pn, cpu_env, nofs);
1402 tcg_gen_ld_i64(pm, cpu_env, mofs);
1403 tcg_gen_ld_i64(pg, cpu_env, gofs);
1404
1405 gvec_op->fni8(pd, pn, pm, pg);
1406 tcg_gen_st_i64(pd, cpu_env, dofs);
1407
1408 do_predtest1(pd, pg);
1409
1410 tcg_temp_free_i64(pd);
1411 tcg_temp_free_i64(pn);
1412 tcg_temp_free_i64(pm);
1413 tcg_temp_free_i64(pg);
1414 } else {
1415 /* The operation and flags generation is large. The computation
1416 * of the flags depends on the original contents of the guarding
1417 * predicate. If the destination overwrites the guarding predicate,
1418 * then the easiest way to get this right is to save a copy.
1419 */
1420 int tofs = gofs;
1421 if (a->rd == a->pg) {
1422 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1423 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1424 }
1425
1426 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1427 do_predtest(s, dofs, tofs, psz / 8);
1428 }
1429 return true;
1430}
1431
1432static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1433{
1434 tcg_gen_and_i64(pd, pn, pm);
1435 tcg_gen_and_i64(pd, pd, pg);
1436}
1437
1438static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1439 TCGv_vec pm, TCGv_vec pg)
1440{
1441 tcg_gen_and_vec(vece, pd, pn, pm);
1442 tcg_gen_and_vec(vece, pd, pd, pg);
1443}
1444
3a7be554 1445static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1446{
1447 static const GVecGen4 op = {
1448 .fni8 = gen_and_pg_i64,
1449 .fniv = gen_and_pg_vec,
1450 .fno = gen_helper_sve_and_pppp,
1451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1452 };
dd81a8d7
RH
1453
1454 if (!a->s) {
dd81a8d7
RH
1455 if (a->rn == a->rm) {
1456 if (a->pg == a->rn) {
23e5fa5f 1457 return do_mov_p(s, a->rd, a->rn);
dd81a8d7 1458 }
23e5fa5f 1459 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
dd81a8d7 1460 } else if (a->pg == a->rn || a->pg == a->rm) {
23e5fa5f 1461 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
516e246a 1462 }
516e246a 1463 }
dd81a8d7 1464 return do_pppp_flags(s, a, &op);
516e246a
RH
1465}
1466
1467static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1468{
1469 tcg_gen_andc_i64(pd, pn, pm);
1470 tcg_gen_and_i64(pd, pd, pg);
1471}
1472
1473static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1474 TCGv_vec pm, TCGv_vec pg)
1475{
1476 tcg_gen_andc_vec(vece, pd, pn, pm);
1477 tcg_gen_and_vec(vece, pd, pd, pg);
1478}
1479
3a7be554 1480static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1481{
1482 static const GVecGen4 op = {
1483 .fni8 = gen_bic_pg_i64,
1484 .fniv = gen_bic_pg_vec,
1485 .fno = gen_helper_sve_bic_pppp,
1486 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1487 };
dd81a8d7
RH
1488
1489 if (!a->s && a->pg == a->rn) {
23e5fa5f 1490 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
516e246a 1491 }
dd81a8d7 1492 return do_pppp_flags(s, a, &op);
516e246a
RH
1493}
1494
1495static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1496{
1497 tcg_gen_xor_i64(pd, pn, pm);
1498 tcg_gen_and_i64(pd, pd, pg);
1499}
1500
1501static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1502 TCGv_vec pm, TCGv_vec pg)
1503{
1504 tcg_gen_xor_vec(vece, pd, pn, pm);
1505 tcg_gen_and_vec(vece, pd, pd, pg);
1506}
1507
3a7be554 1508static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1509{
1510 static const GVecGen4 op = {
1511 .fni8 = gen_eor_pg_i64,
1512 .fniv = gen_eor_pg_vec,
1513 .fno = gen_helper_sve_eor_pppp,
1514 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1515 };
738b679c
RH
1516
1517 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1518 if (!a->s && a->pg == a->rm) {
1519 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1520 }
dd81a8d7 1521 return do_pppp_flags(s, a, &op);
516e246a
RH
1522}
1523
3a7be554 1524static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1525{
516e246a
RH
1526 if (a->s) {
1527 return false;
516e246a 1528 }
d4bc6232
RH
1529 if (sve_access_check(s)) {
1530 unsigned psz = pred_gvec_reg_size(s);
1531 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1532 pred_full_reg_offset(s, a->pg),
1533 pred_full_reg_offset(s, a->rn),
1534 pred_full_reg_offset(s, a->rm), psz, psz);
1535 }
1536 return true;
516e246a
RH
1537}
1538
1539static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1540{
1541 tcg_gen_or_i64(pd, pn, pm);
1542 tcg_gen_and_i64(pd, pd, pg);
1543}
1544
1545static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1546 TCGv_vec pm, TCGv_vec pg)
1547{
1548 tcg_gen_or_vec(vece, pd, pn, pm);
1549 tcg_gen_and_vec(vece, pd, pd, pg);
1550}
1551
3a7be554 1552static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1553{
1554 static const GVecGen4 op = {
1555 .fni8 = gen_orr_pg_i64,
1556 .fniv = gen_orr_pg_vec,
1557 .fno = gen_helper_sve_orr_pppp,
1558 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1559 };
dd81a8d7
RH
1560
1561 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1562 return do_mov_p(s, a->rd, a->rn);
516e246a 1563 }
dd81a8d7 1564 return do_pppp_flags(s, a, &op);
516e246a
RH
1565}
1566
1567static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1568{
1569 tcg_gen_orc_i64(pd, pn, pm);
1570 tcg_gen_and_i64(pd, pd, pg);
1571}
1572
1573static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1574 TCGv_vec pm, TCGv_vec pg)
1575{
1576 tcg_gen_orc_vec(vece, pd, pn, pm);
1577 tcg_gen_and_vec(vece, pd, pd, pg);
1578}
1579
3a7be554 1580static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1581{
1582 static const GVecGen4 op = {
1583 .fni8 = gen_orn_pg_i64,
1584 .fniv = gen_orn_pg_vec,
1585 .fno = gen_helper_sve_orn_pppp,
1586 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1587 };
dd81a8d7 1588 return do_pppp_flags(s, a, &op);
516e246a
RH
1589}
1590
1591static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1592{
1593 tcg_gen_or_i64(pd, pn, pm);
1594 tcg_gen_andc_i64(pd, pg, pd);
1595}
1596
1597static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1598 TCGv_vec pm, TCGv_vec pg)
1599{
1600 tcg_gen_or_vec(vece, pd, pn, pm);
1601 tcg_gen_andc_vec(vece, pd, pg, pd);
1602}
1603
3a7be554 1604static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1605{
1606 static const GVecGen4 op = {
1607 .fni8 = gen_nor_pg_i64,
1608 .fniv = gen_nor_pg_vec,
1609 .fno = gen_helper_sve_nor_pppp,
1610 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1611 };
dd81a8d7 1612 return do_pppp_flags(s, a, &op);
516e246a
RH
1613}
1614
1615static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1616{
1617 tcg_gen_and_i64(pd, pn, pm);
1618 tcg_gen_andc_i64(pd, pg, pd);
1619}
1620
1621static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1622 TCGv_vec pm, TCGv_vec pg)
1623{
1624 tcg_gen_and_vec(vece, pd, pn, pm);
1625 tcg_gen_andc_vec(vece, pd, pg, pd);
1626}
1627
3a7be554 1628static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1629{
1630 static const GVecGen4 op = {
1631 .fni8 = gen_nand_pg_i64,
1632 .fniv = gen_nand_pg_vec,
1633 .fno = gen_helper_sve_nand_pppp,
1634 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1635 };
dd81a8d7 1636 return do_pppp_flags(s, a, &op);
516e246a
RH
1637}
1638
9e18d7a6
RH
1639/*
1640 *** SVE Predicate Misc Group
1641 */
1642
3a7be554 1643static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1644{
1645 if (sve_access_check(s)) {
1646 int nofs = pred_full_reg_offset(s, a->rn);
1647 int gofs = pred_full_reg_offset(s, a->pg);
1648 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1649
1650 if (words == 1) {
1651 TCGv_i64 pn = tcg_temp_new_i64();
1652 TCGv_i64 pg = tcg_temp_new_i64();
1653
1654 tcg_gen_ld_i64(pn, cpu_env, nofs);
1655 tcg_gen_ld_i64(pg, cpu_env, gofs);
1656 do_predtest1(pn, pg);
1657
1658 tcg_temp_free_i64(pn);
1659 tcg_temp_free_i64(pg);
1660 } else {
1661 do_predtest(s, nofs, gofs, words);
1662 }
1663 }
1664 return true;
1665}
1666
028e2a7b
RH
1667/* See the ARM pseudocode DecodePredCount. */
1668static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1669{
1670 unsigned elements = fullsz >> esz;
1671 unsigned bound;
1672
1673 switch (pattern) {
1674 case 0x0: /* POW2 */
1675 return pow2floor(elements);
1676 case 0x1: /* VL1 */
1677 case 0x2: /* VL2 */
1678 case 0x3: /* VL3 */
1679 case 0x4: /* VL4 */
1680 case 0x5: /* VL5 */
1681 case 0x6: /* VL6 */
1682 case 0x7: /* VL7 */
1683 case 0x8: /* VL8 */
1684 bound = pattern;
1685 break;
1686 case 0x9: /* VL16 */
1687 case 0xa: /* VL32 */
1688 case 0xb: /* VL64 */
1689 case 0xc: /* VL128 */
1690 case 0xd: /* VL256 */
1691 bound = 16 << (pattern - 9);
1692 break;
1693 case 0x1d: /* MUL4 */
1694 return elements - elements % 4;
1695 case 0x1e: /* MUL3 */
1696 return elements - elements % 3;
1697 case 0x1f: /* ALL */
1698 return elements;
1699 default: /* #uimm5 */
1700 return 0;
1701 }
1702 return elements >= bound ? bound : 0;
1703}
1704
1705/* This handles all of the predicate initialization instructions,
1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1707 * so that decode_pred_count returns 0. For SETFFR, we will have
1708 * set RD == 16 == FFR.
1709 */
1710static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1711{
1712 if (!sve_access_check(s)) {
1713 return true;
1714 }
1715
1716 unsigned fullsz = vec_full_reg_size(s);
1717 unsigned ofs = pred_full_reg_offset(s, rd);
1718 unsigned numelem, setsz, i;
1719 uint64_t word, lastword;
1720 TCGv_i64 t;
1721
1722 numelem = decode_pred_count(fullsz, pat, esz);
1723
1724 /* Determine what we must store into each bit, and how many. */
1725 if (numelem == 0) {
1726 lastword = word = 0;
1727 setsz = fullsz;
1728 } else {
1729 setsz = numelem << esz;
1730 lastword = word = pred_esz_masks[esz];
1731 if (setsz % 64) {
973558a3 1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1733 }
1734 }
1735
1736 t = tcg_temp_new_i64();
1737 if (fullsz <= 64) {
1738 tcg_gen_movi_i64(t, lastword);
1739 tcg_gen_st_i64(t, cpu_env, ofs);
1740 goto done;
1741 }
1742
1743 if (word == lastword) {
1744 unsigned maxsz = size_for_gvec(fullsz / 8);
1745 unsigned oprsz = size_for_gvec(setsz / 8);
1746
1747 if (oprsz * 8 == setsz) {
8711e71f 1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1749 goto done;
1750 }
028e2a7b
RH
1751 }
1752
1753 setsz /= 8;
1754 fullsz /= 8;
1755
1756 tcg_gen_movi_i64(t, word);
973558a3 1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1758 tcg_gen_st_i64(t, cpu_env, ofs + i);
1759 }
1760 if (lastword != word) {
1761 tcg_gen_movi_i64(t, lastword);
1762 tcg_gen_st_i64(t, cpu_env, ofs + i);
1763 i += 8;
1764 }
1765 if (i < fullsz) {
1766 tcg_gen_movi_i64(t, 0);
1767 for (; i < fullsz; i += 8) {
1768 tcg_gen_st_i64(t, cpu_env, ofs + i);
1769 }
1770 }
1771
1772 done:
1773 tcg_temp_free_i64(t);
1774
1775 /* PTRUES */
1776 if (setflag) {
1777 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1778 tcg_gen_movi_i32(cpu_CF, word == 0);
1779 tcg_gen_movi_i32(cpu_VF, 0);
1780 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1781 }
1782 return true;
1783}
1784
b03a8501 1785TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1786
b03a8501
RH
1787/* Note pat == 31 is #all, to set all elements. */
1788TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1789
b03a8501
RH
1790/* Note pat == 32 is #unimp, to set no elements. */
1791TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1792
3a7be554 1793static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1794{
1795 /* The path through do_pppp_flags is complicated enough to want to avoid
1796 * duplication. Frob the arguments into the form of a predicated AND.
1797 */
1798 arg_rprr_s alt_a = {
1799 .rd = a->rd, .pg = a->pg, .s = a->s,
1800 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1801 };
3a7be554 1802 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1803}
1804
ff502658
RH
1805TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1806TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1807
1808static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1809 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1810 TCGv_ptr, TCGv_i32))
1811{
1812 if (!sve_access_check(s)) {
1813 return true;
1814 }
1815
1816 TCGv_ptr t_pd = tcg_temp_new_ptr();
1817 TCGv_ptr t_pg = tcg_temp_new_ptr();
1818 TCGv_i32 t;
86300b5d 1819 unsigned desc = 0;
028e2a7b 1820
86300b5d
RH
1821 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1822 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1823
1824 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1825 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1826 t = tcg_temp_new_i32();
028e2a7b 1827
392acacc 1828 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1829 tcg_temp_free_ptr(t_pd);
1830 tcg_temp_free_ptr(t_pg);
1831
1832 do_pred_flags(t);
1833 tcg_temp_free_i32(t);
1834 return true;
1835}
1836
d95040e3
RH
1837TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1838TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1839
24e82e68
RH
1840/*
1841 *** SVE Element Count Group
1842 */
1843
1844/* Perform an inline saturating addition of a 32-bit value within
1845 * a 64-bit register. The second operand is known to be positive,
1846 * which halves the comparisions we must perform to bound the result.
1847 */
1848static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1849{
1850 int64_t ibound;
24e82e68
RH
1851
1852 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1853 if (u) {
1854 tcg_gen_ext32u_i64(reg, reg);
1855 } else {
1856 tcg_gen_ext32s_i64(reg, reg);
1857 }
1858 if (d) {
1859 tcg_gen_sub_i64(reg, reg, val);
1860 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1861 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1862 } else {
1863 tcg_gen_add_i64(reg, reg, val);
1864 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1865 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1866 }
24e82e68
RH
1867}
1868
1869/* Similarly with 64-bit values. */
1870static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1871{
1872 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1873 TCGv_i64 t2;
1874
1875 if (u) {
1876 if (d) {
1877 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1878 t2 = tcg_constant_i64(0);
1879 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1880 } else {
1881 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1882 t2 = tcg_constant_i64(-1);
1883 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1884 }
1885 } else {
35a1ec8e 1886 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1887 if (d) {
1888 /* Detect signed overflow for subtraction. */
1889 tcg_gen_xor_i64(t0, reg, val);
1890 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1891 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1892 tcg_gen_and_i64(t0, t0, reg);
1893
1894 /* Bound the result. */
1895 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1896 t2 = tcg_constant_i64(0);
24e82e68
RH
1897 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1898 } else {
1899 /* Detect signed overflow for addition. */
1900 tcg_gen_xor_i64(t0, reg, val);
1901 tcg_gen_add_i64(reg, reg, val);
1902 tcg_gen_xor_i64(t1, reg, val);
1903 tcg_gen_andc_i64(t0, t1, t0);
1904
1905 /* Bound the result. */
1906 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1907 t2 = tcg_constant_i64(0);
24e82e68
RH
1908 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1909 }
35a1ec8e 1910 tcg_temp_free_i64(t1);
24e82e68
RH
1911 }
1912 tcg_temp_free_i64(t0);
24e82e68
RH
1913}
1914
1915/* Similarly with a vector and a scalar operand. */
1916static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1917 TCGv_i64 val, bool u, bool d)
1918{
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_ptr dptr, nptr;
1921 TCGv_i32 t32, desc;
1922 TCGv_i64 t64;
1923
1924 dptr = tcg_temp_new_ptr();
1925 nptr = tcg_temp_new_ptr();
1926 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1927 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1928 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1929
1930 switch (esz) {
1931 case MO_8:
1932 t32 = tcg_temp_new_i32();
1933 tcg_gen_extrl_i64_i32(t32, val);
1934 if (d) {
1935 tcg_gen_neg_i32(t32, t32);
1936 }
1937 if (u) {
1938 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1939 } else {
1940 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1941 }
1942 tcg_temp_free_i32(t32);
1943 break;
1944
1945 case MO_16:
1946 t32 = tcg_temp_new_i32();
1947 tcg_gen_extrl_i64_i32(t32, val);
1948 if (d) {
1949 tcg_gen_neg_i32(t32, t32);
1950 }
1951 if (u) {
1952 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1953 } else {
1954 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1955 }
1956 tcg_temp_free_i32(t32);
1957 break;
1958
1959 case MO_32:
1960 t64 = tcg_temp_new_i64();
1961 if (d) {
1962 tcg_gen_neg_i64(t64, val);
1963 } else {
1964 tcg_gen_mov_i64(t64, val);
1965 }
1966 if (u) {
1967 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1968 } else {
1969 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1970 }
1971 tcg_temp_free_i64(t64);
1972 break;
1973
1974 case MO_64:
1975 if (u) {
1976 if (d) {
1977 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1978 } else {
1979 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1980 }
1981 } else if (d) {
1982 t64 = tcg_temp_new_i64();
1983 tcg_gen_neg_i64(t64, val);
1984 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1985 tcg_temp_free_i64(t64);
1986 } else {
1987 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1988 }
1989 break;
1990
1991 default:
1992 g_assert_not_reached();
1993 }
1994
1995 tcg_temp_free_ptr(dptr);
1996 tcg_temp_free_ptr(nptr);
24e82e68
RH
1997}
1998
3a7be554 1999static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
2000{
2001 if (sve_access_check(s)) {
2002 unsigned fullsz = vec_full_reg_size(s);
2003 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2004 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2005 }
2006 return true;
2007}
2008
3a7be554 2009static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2010{
2011 if (sve_access_check(s)) {
2012 unsigned fullsz = vec_full_reg_size(s);
2013 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2014 int inc = numelem * a->imm * (a->d ? -1 : 1);
2015 TCGv_i64 reg = cpu_reg(s, a->rd);
2016
2017 tcg_gen_addi_i64(reg, reg, inc);
2018 }
2019 return true;
2020}
2021
3a7be554 2022static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2023{
2024 if (!sve_access_check(s)) {
2025 return true;
2026 }
2027
2028 unsigned fullsz = vec_full_reg_size(s);
2029 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2030 int inc = numelem * a->imm;
2031 TCGv_i64 reg = cpu_reg(s, a->rd);
2032
2033 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2034 if (inc == 0) {
2035 if (a->u) {
2036 tcg_gen_ext32u_i64(reg, reg);
2037 } else {
2038 tcg_gen_ext32s_i64(reg, reg);
2039 }
2040 } else {
d681f125 2041 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2042 }
2043 return true;
2044}
2045
3a7be554 2046static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
2047{
2048 if (!sve_access_check(s)) {
2049 return true;
2050 }
2051
2052 unsigned fullsz = vec_full_reg_size(s);
2053 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2054 int inc = numelem * a->imm;
2055 TCGv_i64 reg = cpu_reg(s, a->rd);
2056
2057 if (inc != 0) {
d681f125 2058 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2059 }
2060 return true;
2061}
2062
3a7be554 2063static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2064{
2065 if (a->esz == 0) {
2066 return false;
2067 }
2068
2069 unsigned fullsz = vec_full_reg_size(s);
2070 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2071 int inc = numelem * a->imm;
2072
2073 if (inc != 0) {
2074 if (sve_access_check(s)) {
24e82e68
RH
2075 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2076 vec_full_reg_offset(s, a->rn),
d681f125
RH
2077 tcg_constant_i64(a->d ? -inc : inc),
2078 fullsz, fullsz);
24e82e68
RH
2079 }
2080 } else {
2081 do_mov_z(s, a->rd, a->rn);
2082 }
2083 return true;
2084}
2085
3a7be554 2086static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
2087{
2088 if (a->esz == 0) {
2089 return false;
2090 }
2091
2092 unsigned fullsz = vec_full_reg_size(s);
2093 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2094 int inc = numelem * a->imm;
2095
2096 if (inc != 0) {
2097 if (sve_access_check(s)) {
d681f125
RH
2098 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2099 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2100 }
2101 } else {
2102 do_mov_z(s, a->rd, a->rn);
2103 }
2104 return true;
2105}
2106
e1fa1164
RH
2107/*
2108 *** SVE Bitwise Immediate Group
2109 */
2110
2111static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2112{
2113 uint64_t imm;
2114 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2115 extract32(a->dbm, 0, 6),
2116 extract32(a->dbm, 6, 6))) {
2117 return false;
2118 }
faf915e2 2119 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2120}
2121
15a314da
RH
2122TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2123TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2124TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2125
3a7be554 2126static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2127{
2128 uint64_t imm;
2129 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2130 extract32(a->dbm, 0, 6),
2131 extract32(a->dbm, 6, 6))) {
2132 return false;
2133 }
2134 if (sve_access_check(s)) {
2135 do_dupi_z(s, a->rd, imm);
2136 }
2137 return true;
2138}
2139
f25a2361
RH
2140/*
2141 *** SVE Integer Wide Immediate - Predicated Group
2142 */
2143
2144/* Implement all merging copies. This is used for CPY (immediate),
2145 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2146 */
2147static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2148 TCGv_i64 val)
2149{
2150 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2151 static gen_cpy * const fns[4] = {
2152 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2153 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2154 };
2155 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2156 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2157 TCGv_ptr t_zd = tcg_temp_new_ptr();
2158 TCGv_ptr t_zn = tcg_temp_new_ptr();
2159 TCGv_ptr t_pg = tcg_temp_new_ptr();
2160
2161 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2162 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2163 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2164
2165 fns[esz](t_zd, t_zn, t_pg, val, desc);
2166
2167 tcg_temp_free_ptr(t_zd);
2168 tcg_temp_free_ptr(t_zn);
2169 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2170}
2171
3a7be554 2172static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2173{
2174 if (a->esz == 0) {
2175 return false;
2176 }
2177 if (sve_access_check(s)) {
2178 /* Decode the VFP immediate. */
2179 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2180 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2181 }
2182 return true;
2183}
2184
3a7be554 2185static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2186{
f25a2361 2187 if (sve_access_check(s)) {
e152b48b 2188 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2189 }
2190 return true;
2191}
2192
3a7be554 2193static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2194{
2195 static gen_helper_gvec_2i * const fns[4] = {
2196 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2197 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2198 };
2199
f25a2361
RH
2200 if (sve_access_check(s)) {
2201 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2202 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2203 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2204 tcg_constant_i64(a->imm),
2205 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2206 }
2207 return true;
2208}
2209
b94f8f60
RH
2210/*
2211 *** SVE Permute Extract Group
2212 */
2213
75114792 2214static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2215{
2216 if (!sve_access_check(s)) {
2217 return true;
2218 }
2219
2220 unsigned vsz = vec_full_reg_size(s);
75114792 2221 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2222 unsigned n_siz = vsz - n_ofs;
75114792
SL
2223 unsigned d = vec_full_reg_offset(s, rd);
2224 unsigned n = vec_full_reg_offset(s, rn);
2225 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2226
2227 /* Use host vector move insns if we have appropriate sizes
2228 * and no unfortunate overlap.
2229 */
2230 if (m != d
2231 && n_ofs == size_for_gvec(n_ofs)
2232 && n_siz == size_for_gvec(n_siz)
2233 && (d != n || n_siz <= n_ofs)) {
2234 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2235 if (n_ofs != 0) {
2236 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2237 }
2238 } else {
2239 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2240 }
2241 return true;
2242}
2243
c799c115
RH
2244TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2245TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2246
30562ab7
RH
2247/*
2248 *** SVE Permute - Unpredicated Group
2249 */
2250
3a7be554 2251static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2252{
2253 if (sve_access_check(s)) {
2254 unsigned vsz = vec_full_reg_size(s);
2255 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2256 vsz, vsz, cpu_reg_sp(s, a->rn));
2257 }
2258 return true;
2259}
2260
3a7be554 2261static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2262{
2263 if ((a->imm & 0x1f) == 0) {
2264 return false;
2265 }
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned dofs = vec_full_reg_offset(s, a->rd);
2269 unsigned esz, index;
2270
2271 esz = ctz32(a->imm);
2272 index = a->imm >> (esz + 1);
2273
2274 if ((index << esz) < vsz) {
2275 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2276 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2277 } else {
7e17d50e
RH
2278 /*
2279 * While dup_mem handles 128-bit elements, dup_imm does not.
2280 * Thankfully element size doesn't matter for splatting zero.
2281 */
2282 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2283 }
2284 }
2285 return true;
2286}
2287
2288static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2289{
2290 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2291 static gen_insr * const fns[4] = {
2292 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2293 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2294 };
2295 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2296 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2297 TCGv_ptr t_zd = tcg_temp_new_ptr();
2298 TCGv_ptr t_zn = tcg_temp_new_ptr();
2299
2300 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2301 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2302
2303 fns[a->esz](t_zd, t_zn, val, desc);
2304
2305 tcg_temp_free_ptr(t_zd);
2306 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2307}
2308
3a7be554 2309static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2310{
2311 if (sve_access_check(s)) {
2312 TCGv_i64 t = tcg_temp_new_i64();
2313 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2314 do_insr_i64(s, a, t);
2315 tcg_temp_free_i64(t);
2316 }
2317 return true;
2318}
2319
3a7be554 2320static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2321{
2322 if (sve_access_check(s)) {
2323 do_insr_i64(s, a, cpu_reg(s, a->rm));
2324 }
2325 return true;
2326}
2327
0ea3cdbf
RH
2328static gen_helper_gvec_2 * const rev_fns[4] = {
2329 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2330 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2331};
2332TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2333
32e2ad65
RH
2334static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2335 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2336 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2337};
2338TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2339
5f425b92
RH
2340static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2341 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2342 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2343};
2344TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2345 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2346
32e2ad65
RH
2347static gen_helper_gvec_3 * const tbx_fns[4] = {
2348 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2349 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2350};
2351TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2352
3a7be554 2353static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2354{
2355 static gen_helper_gvec_2 * const fns[4][2] = {
2356 { NULL, NULL },
2357 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2358 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2359 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2360 };
2361
2362 if (a->esz == 0) {
2363 return false;
2364 }
2365 if (sve_access_check(s)) {
2366 unsigned vsz = vec_full_reg_size(s);
2367 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2368 vec_full_reg_offset(s, a->rn)
2369 + (a->h ? vsz / 2 : 0),
2370 vsz, vsz, 0, fns[a->esz][a->u]);
2371 }
2372 return true;
2373}
2374
d731d8cb
RH
2375/*
2376 *** SVE Permute - Predicates Group
2377 */
2378
2379static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2380 gen_helper_gvec_3 *fn)
2381{
2382 if (!sve_access_check(s)) {
2383 return true;
2384 }
2385
2386 unsigned vsz = pred_full_reg_size(s);
2387
d731d8cb
RH
2388 TCGv_ptr t_d = tcg_temp_new_ptr();
2389 TCGv_ptr t_n = tcg_temp_new_ptr();
2390 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2391 uint32_t desc = 0;
d731d8cb 2392
f9b0fcce
RH
2393 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2394 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2395 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2396
2397 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2398 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2399 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2400
c6a59b55 2401 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2402
2403 tcg_temp_free_ptr(t_d);
2404 tcg_temp_free_ptr(t_n);
2405 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2406 return true;
2407}
2408
2409static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2410 gen_helper_gvec_2 *fn)
2411{
2412 if (!sve_access_check(s)) {
2413 return true;
2414 }
2415
2416 unsigned vsz = pred_full_reg_size(s);
2417 TCGv_ptr t_d = tcg_temp_new_ptr();
2418 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2419 uint32_t desc = 0;
d731d8cb
RH
2420
2421 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2422 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2423
70acaafe
RH
2424 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2425 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2426 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2427
c6a59b55 2428 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2429
d731d8cb
RH
2430 tcg_temp_free_ptr(t_d);
2431 tcg_temp_free_ptr(t_n);
2432 return true;
2433}
2434
bdb349f5
RH
2435TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2436TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2437TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2438TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2439TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2440TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2441
1d0fce4b
RH
2442TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2443TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2444TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2445
234b48e9
RH
2446/*
2447 *** SVE Permute - Interleaving Group
2448 */
2449
a95b9618
RH
2450static gen_helper_gvec_3 * const zip_fns[4] = {
2451 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2452 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2453};
2454TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2455 zip_fns[a->esz], a, 0)
2456TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2457 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2458
2459TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2460 gen_helper_sve2_zip_q, a, 0)
2461TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2462 gen_helper_sve2_zip_q, a,
2463 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2464
234b48e9
RH
2465static gen_helper_gvec_3 * const uzp_fns[4] = {
2466 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2467 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2468};
2469
32e2ad65
RH
2470TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2471 uzp_fns[a->esz], a, 0)
2472TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2473 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2474
32e2ad65
RH
2475TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2476 gen_helper_sve2_uzp_q, a, 0)
2477TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2478 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2479
234b48e9
RH
2480static gen_helper_gvec_3 * const trn_fns[4] = {
2481 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2482 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2483};
2484
32e2ad65
RH
2485TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2486 trn_fns[a->esz], a, 0)
2487TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2488 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2489
32e2ad65
RH
2490TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2491 gen_helper_sve2_trn_q, a, 0)
2492TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2493 gen_helper_sve2_trn_q, a, 16)
74b64b25 2494
3ca879ae
RH
2495/*
2496 *** SVE Permute Vector - Predicated Group
2497 */
2498
817bd5c9
RH
2499static gen_helper_gvec_3 * const compact_fns[4] = {
2500 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2501};
2502TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2503
ef23cb72
RH
2504/* Call the helper that computes the ARM LastActiveElement pseudocode
2505 * function, scaled by the element size. This includes the not found
2506 * indication; e.g. not found for esz=3 is -8.
2507 */
2508static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2509{
2510 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2511 * round up, as we do elsewhere, because we need the exact size.
2512 */
2513 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2514 unsigned desc = 0;
ef23cb72 2515
2acbfbe4
RH
2516 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2517 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2518
2519 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2520
c6a59b55 2521 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2522
ef23cb72
RH
2523 tcg_temp_free_ptr(t_p);
2524}
2525
2526/* Increment LAST to the offset of the next element in the vector,
2527 * wrapping around to 0.
2528 */
2529static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2530{
2531 unsigned vsz = vec_full_reg_size(s);
2532
2533 tcg_gen_addi_i32(last, last, 1 << esz);
2534 if (is_power_of_2(vsz)) {
2535 tcg_gen_andi_i32(last, last, vsz - 1);
2536 } else {
4b308bd5
RH
2537 TCGv_i32 max = tcg_constant_i32(vsz);
2538 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2539 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2540 }
2541}
2542
2543/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2544static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2545{
2546 unsigned vsz = vec_full_reg_size(s);
2547
2548 if (is_power_of_2(vsz)) {
2549 tcg_gen_andi_i32(last, last, vsz - 1);
2550 } else {
4b308bd5
RH
2551 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2552 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2553 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2554 }
2555}
2556
2557/* Load an unsigned element of ESZ from BASE+OFS. */
2558static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2559{
2560 TCGv_i64 r = tcg_temp_new_i64();
2561
2562 switch (esz) {
2563 case 0:
2564 tcg_gen_ld8u_i64(r, base, ofs);
2565 break;
2566 case 1:
2567 tcg_gen_ld16u_i64(r, base, ofs);
2568 break;
2569 case 2:
2570 tcg_gen_ld32u_i64(r, base, ofs);
2571 break;
2572 case 3:
2573 tcg_gen_ld_i64(r, base, ofs);
2574 break;
2575 default:
2576 g_assert_not_reached();
2577 }
2578 return r;
2579}
2580
2581/* Load an unsigned element of ESZ from RM[LAST]. */
2582static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2583 int rm, int esz)
2584{
2585 TCGv_ptr p = tcg_temp_new_ptr();
2586 TCGv_i64 r;
2587
2588 /* Convert offset into vector into offset into ENV.
2589 * The final adjustment for the vector register base
2590 * is added via constant offset to the load.
2591 */
e03b5686 2592#if HOST_BIG_ENDIAN
ef23cb72
RH
2593 /* Adjust for element ordering. See vec_reg_offset. */
2594 if (esz < 3) {
2595 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2596 }
2597#endif
2598 tcg_gen_ext_i32_ptr(p, last);
2599 tcg_gen_add_ptr(p, p, cpu_env);
2600
2601 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2602 tcg_temp_free_ptr(p);
2603
2604 return r;
2605}
2606
2607/* Compute CLAST for a Zreg. */
2608static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2609{
2610 TCGv_i32 last;
2611 TCGLabel *over;
2612 TCGv_i64 ele;
2613 unsigned vsz, esz = a->esz;
2614
2615 if (!sve_access_check(s)) {
2616 return true;
2617 }
2618
2619 last = tcg_temp_local_new_i32();
2620 over = gen_new_label();
2621
2622 find_last_active(s, last, esz, a->pg);
2623
2624 /* There is of course no movcond for a 2048-bit vector,
2625 * so we must branch over the actual store.
2626 */
2627 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2628
2629 if (!before) {
2630 incr_last_active(s, last, esz);
2631 }
2632
2633 ele = load_last_active(s, last, a->rm, esz);
2634 tcg_temp_free_i32(last);
2635
2636 vsz = vec_full_reg_size(s);
2637 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2638 tcg_temp_free_i64(ele);
2639
2640 /* If this insn used MOVPRFX, we may need a second move. */
2641 if (a->rd != a->rn) {
2642 TCGLabel *done = gen_new_label();
2643 tcg_gen_br(done);
2644
2645 gen_set_label(over);
2646 do_mov_z(s, a->rd, a->rn);
2647
2648 gen_set_label(done);
2649 } else {
2650 gen_set_label(over);
2651 }
2652 return true;
2653}
2654
db7fa5d8
RH
2655TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2656TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2657
2658/* Compute CLAST for a scalar. */
2659static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2660 bool before, TCGv_i64 reg_val)
2661{
2662 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2663 TCGv_i64 ele, cmp;
ef23cb72
RH
2664
2665 find_last_active(s, last, esz, pg);
2666
2667 /* Extend the original value of last prior to incrementing. */
2668 cmp = tcg_temp_new_i64();
2669 tcg_gen_ext_i32_i64(cmp, last);
2670
2671 if (!before) {
2672 incr_last_active(s, last, esz);
2673 }
2674
2675 /* The conceit here is that while last < 0 indicates not found, after
2676 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2677 * from which we can load garbage. We then discard the garbage with
2678 * a conditional move.
2679 */
2680 ele = load_last_active(s, last, rm, esz);
2681 tcg_temp_free_i32(last);
2682
053552d3
RH
2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2684 ele, reg_val);
ef23cb72 2685
ef23cb72
RH
2686 tcg_temp_free_i64(cmp);
2687 tcg_temp_free_i64(ele);
2688}
2689
2690/* Compute CLAST for a Vreg. */
2691static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2692{
2693 if (sve_access_check(s)) {
2694 int esz = a->esz;
2695 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2696 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2697
2698 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2699 write_fp_dreg(s, a->rd, reg);
2700 tcg_temp_free_i64(reg);
2701 }
2702 return true;
2703}
2704
ac4fb247
RH
2705TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2706TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2707
2708/* Compute CLAST for a Xreg. */
2709static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2710{
2711 TCGv_i64 reg;
2712
2713 if (!sve_access_check(s)) {
2714 return true;
2715 }
2716
2717 reg = cpu_reg(s, a->rd);
2718 switch (a->esz) {
2719 case 0:
2720 tcg_gen_ext8u_i64(reg, reg);
2721 break;
2722 case 1:
2723 tcg_gen_ext16u_i64(reg, reg);
2724 break;
2725 case 2:
2726 tcg_gen_ext32u_i64(reg, reg);
2727 break;
2728 case 3:
2729 break;
2730 default:
2731 g_assert_not_reached();
2732 }
2733
2734 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2735 return true;
2736}
2737
c673404a
RH
2738TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2739TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2740
2741/* Compute LAST for a scalar. */
2742static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2743 int pg, int rm, bool before)
2744{
2745 TCGv_i32 last = tcg_temp_new_i32();
2746 TCGv_i64 ret;
2747
2748 find_last_active(s, last, esz, pg);
2749 if (before) {
2750 wrap_last_active(s, last, esz);
2751 } else {
2752 incr_last_active(s, last, esz);
2753 }
2754
2755 ret = load_last_active(s, last, rm, esz);
2756 tcg_temp_free_i32(last);
2757 return ret;
2758}
2759
2760/* Compute LAST for a Vreg. */
2761static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2762{
2763 if (sve_access_check(s)) {
2764 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2765 write_fp_dreg(s, a->rd, val);
2766 tcg_temp_free_i64(val);
2767 }
2768 return true;
2769}
2770
75de9fd4
RH
2771TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2772TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2773
2774/* Compute LAST for a Xreg. */
2775static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2776{
2777 if (sve_access_check(s)) {
2778 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2779 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2780 tcg_temp_free_i64(val);
2781 }
2782 return true;
2783}
2784
884c5a80
RH
2785TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2786TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2787
3a7be554 2788static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2789{
2790 if (sve_access_check(s)) {
2791 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2792 }
2793 return true;
2794}
2795
3a7be554 2796static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2797{
2798 if (sve_access_check(s)) {
2799 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2800 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2801 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2802 tcg_temp_free_i64(t);
2803 }
2804 return true;
2805}
2806
817bd5c9
RH
2807static gen_helper_gvec_3 * const revb_fns[4] = {
2808 NULL, gen_helper_sve_revb_h,
2809 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2810};
2811TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2812
817bd5c9
RH
2813static gen_helper_gvec_3 * const revh_fns[4] = {
2814 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2815};
2816TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2817
817bd5c9
RH
2818TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2819 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2820
897ebd70
RH
2821TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2822 gen_helper_sve_splice, a, a->esz)
b48ff240 2823
897ebd70
RH
2824TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2825 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2826
757f9cff
RH
2827/*
2828 *** SVE Integer Compare - Vectors Group
2829 */
2830
2831static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2832 gen_helper_gvec_flags_4 *gen_fn)
2833{
2834 TCGv_ptr pd, zn, zm, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2837
2838 if (gen_fn == NULL) {
2839 return false;
2840 }
2841 if (!sve_access_check(s)) {
2842 return true;
2843 }
2844
2845 vsz = vec_full_reg_size(s);
392acacc 2846 t = tcg_temp_new_i32();
757f9cff
RH
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 zm = tcg_temp_new_ptr();
2850 pg = tcg_temp_new_ptr();
2851
2852 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2855 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2856
392acacc 2857 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2858
2859 tcg_temp_free_ptr(pd);
2860 tcg_temp_free_ptr(zn);
2861 tcg_temp_free_ptr(zm);
2862 tcg_temp_free_ptr(pg);
2863
2864 do_pred_flags(t);
2865
2866 tcg_temp_free_i32(t);
2867 return true;
2868}
2869
2870#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2871 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2872 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2873 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2874 }; \
2875 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2876 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2877
2878DO_PPZZ(CMPEQ, cmpeq)
2879DO_PPZZ(CMPNE, cmpne)
2880DO_PPZZ(CMPGT, cmpgt)
2881DO_PPZZ(CMPGE, cmpge)
2882DO_PPZZ(CMPHI, cmphi)
2883DO_PPZZ(CMPHS, cmphs)
2884
2885#undef DO_PPZZ
2886
2887#define DO_PPZW(NAME, name) \
671bdb2e
RH
2888 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2889 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2890 gen_helper_sve_##name##_ppzw_s, NULL \
2891 }; \
2892 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2893 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2894
2895DO_PPZW(CMPEQ, cmpeq)
2896DO_PPZW(CMPNE, cmpne)
2897DO_PPZW(CMPGT, cmpgt)
2898DO_PPZW(CMPGE, cmpge)
2899DO_PPZW(CMPHI, cmphi)
2900DO_PPZW(CMPHS, cmphs)
2901DO_PPZW(CMPLT, cmplt)
2902DO_PPZW(CMPLE, cmple)
2903DO_PPZW(CMPLO, cmplo)
2904DO_PPZW(CMPLS, cmpls)
2905
2906#undef DO_PPZW
2907
38cadeba
RH
2908/*
2909 *** SVE Integer Compare - Immediate Groups
2910 */
2911
2912static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2913 gen_helper_gvec_flags_3 *gen_fn)
2914{
2915 TCGv_ptr pd, zn, pg;
2916 unsigned vsz;
2917 TCGv_i32 t;
2918
2919 if (gen_fn == NULL) {
2920 return false;
2921 }
2922 if (!sve_access_check(s)) {
2923 return true;
2924 }
2925
2926 vsz = vec_full_reg_size(s);
392acacc 2927 t = tcg_temp_new_i32();
38cadeba
RH
2928 pd = tcg_temp_new_ptr();
2929 zn = tcg_temp_new_ptr();
2930 pg = tcg_temp_new_ptr();
2931
2932 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2933 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2934 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2935
392acacc 2936 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
2937
2938 tcg_temp_free_ptr(pd);
2939 tcg_temp_free_ptr(zn);
2940 tcg_temp_free_ptr(pg);
2941
2942 do_pred_flags(t);
2943
2944 tcg_temp_free_i32(t);
2945 return true;
2946}
2947
2948#define DO_PPZI(NAME, name) \
9c545be6 2949 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
2950 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2951 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2952 }; \
9c545be6
RH
2953 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2954 name##_ppzi_fns[a->esz])
38cadeba
RH
2955
2956DO_PPZI(CMPEQ, cmpeq)
2957DO_PPZI(CMPNE, cmpne)
2958DO_PPZI(CMPGT, cmpgt)
2959DO_PPZI(CMPGE, cmpge)
2960DO_PPZI(CMPHI, cmphi)
2961DO_PPZI(CMPHS, cmphs)
2962DO_PPZI(CMPLT, cmplt)
2963DO_PPZI(CMPLE, cmple)
2964DO_PPZI(CMPLO, cmplo)
2965DO_PPZI(CMPLS, cmpls)
2966
2967#undef DO_PPZI
2968
35da316f
RH
2969/*
2970 *** SVE Partition Break Group
2971 */
2972
2973static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2974 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2975{
2976 if (!sve_access_check(s)) {
2977 return true;
2978 }
2979
2980 unsigned vsz = pred_full_reg_size(s);
2981
2982 /* Predicate sizes may be smaller and cannot use simd_desc. */
2983 TCGv_ptr d = tcg_temp_new_ptr();
2984 TCGv_ptr n = tcg_temp_new_ptr();
2985 TCGv_ptr m = tcg_temp_new_ptr();
2986 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2987 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2988
2989 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2990 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2991 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2992 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2993
2994 if (a->s) {
93418f1c
RH
2995 TCGv_i32 t = tcg_temp_new_i32();
2996 fn_s(t, d, n, m, g, desc);
35da316f 2997 do_pred_flags(t);
93418f1c 2998 tcg_temp_free_i32(t);
35da316f 2999 } else {
93418f1c 3000 fn(d, n, m, g, desc);
35da316f
RH
3001 }
3002 tcg_temp_free_ptr(d);
3003 tcg_temp_free_ptr(n);
3004 tcg_temp_free_ptr(m);
3005 tcg_temp_free_ptr(g);
35da316f
RH
3006 return true;
3007}
3008
3009static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3010 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3011{
3012 if (!sve_access_check(s)) {
3013 return true;
3014 }
3015
3016 unsigned vsz = pred_full_reg_size(s);
3017
3018 /* Predicate sizes may be smaller and cannot use simd_desc. */
3019 TCGv_ptr d = tcg_temp_new_ptr();
3020 TCGv_ptr n = tcg_temp_new_ptr();
3021 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3022 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3023
3024 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3025 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3026 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3027
3028 if (a->s) {
93418f1c
RH
3029 TCGv_i32 t = tcg_temp_new_i32();
3030 fn_s(t, d, n, g, desc);
35da316f 3031 do_pred_flags(t);
93418f1c 3032 tcg_temp_free_i32(t);
35da316f 3033 } else {
93418f1c 3034 fn(d, n, g, desc);
35da316f
RH
3035 }
3036 tcg_temp_free_ptr(d);
3037 tcg_temp_free_ptr(n);
3038 tcg_temp_free_ptr(g);
35da316f
RH
3039 return true;
3040}
3041
2224d24d
RH
3042TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3043 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3044TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3045 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3046
3047TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3048 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3049TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3050 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3051
3052TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3053 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3054TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3055 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3056
3057TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3058 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 3059
9ee3a611
RH
3060/*
3061 *** SVE Predicate Count Group
3062 */
3063
3064static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3065{
3066 unsigned psz = pred_full_reg_size(s);
3067
3068 if (psz <= 8) {
3069 uint64_t psz_mask;
3070
3071 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3072 if (pn != pg) {
3073 TCGv_i64 g = tcg_temp_new_i64();
3074 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3075 tcg_gen_and_i64(val, val, g);
3076 tcg_temp_free_i64(g);
3077 }
3078
3079 /* Reduce the pred_esz_masks value simply to reduce the
3080 * size of the code generated here.
3081 */
3082 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3083 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3084
3085 tcg_gen_ctpop_i64(val, val);
3086 } else {
3087 TCGv_ptr t_pn = tcg_temp_new_ptr();
3088 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3089 unsigned desc = 0;
9ee3a611 3090
f556a201
RH
3091 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3092 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3093
3094 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3095 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3096
c6a59b55 3097 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3098 tcg_temp_free_ptr(t_pn);
3099 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3100 }
3101}
3102
3a7be554 3103static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3104{
3105 if (sve_access_check(s)) {
3106 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3107 }
3108 return true;
3109}
3110
3a7be554 3111static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3112{
3113 if (sve_access_check(s)) {
3114 TCGv_i64 reg = cpu_reg(s, a->rd);
3115 TCGv_i64 val = tcg_temp_new_i64();
3116
3117 do_cntp(s, val, a->esz, a->pg, a->pg);
3118 if (a->d) {
3119 tcg_gen_sub_i64(reg, reg, val);
3120 } else {
3121 tcg_gen_add_i64(reg, reg, val);
3122 }
3123 tcg_temp_free_i64(val);
3124 }
3125 return true;
3126}
3127
3a7be554 3128static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3129{
3130 if (a->esz == 0) {
3131 return false;
3132 }
3133 if (sve_access_check(s)) {
3134 unsigned vsz = vec_full_reg_size(s);
3135 TCGv_i64 val = tcg_temp_new_i64();
3136 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3137
3138 do_cntp(s, val, a->esz, a->pg, a->pg);
3139 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3140 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3141 }
3142 return true;
3143}
3144
3a7be554 3145static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3146{
3147 if (sve_access_check(s)) {
3148 TCGv_i64 reg = cpu_reg(s, a->rd);
3149 TCGv_i64 val = tcg_temp_new_i64();
3150
3151 do_cntp(s, val, a->esz, a->pg, a->pg);
3152 do_sat_addsub_32(reg, val, a->u, a->d);
3153 }
3154 return true;
3155}
3156
3a7be554 3157static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3158{
3159 if (sve_access_check(s)) {
3160 TCGv_i64 reg = cpu_reg(s, a->rd);
3161 TCGv_i64 val = tcg_temp_new_i64();
3162
3163 do_cntp(s, val, a->esz, a->pg, a->pg);
3164 do_sat_addsub_64(reg, val, a->u, a->d);
3165 }
3166 return true;
3167}
3168
3a7be554 3169static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3170{
3171 if (a->esz == 0) {
3172 return false;
3173 }
3174 if (sve_access_check(s)) {
3175 TCGv_i64 val = tcg_temp_new_i64();
3176 do_cntp(s, val, a->esz, a->pg, a->pg);
3177 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3178 }
3179 return true;
3180}
3181
caf1cefc
RH
3182/*
3183 *** SVE Integer Compare Scalars Group
3184 */
3185
3a7be554 3186static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3187{
3188 if (!sve_access_check(s)) {
3189 return true;
3190 }
3191
3192 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3193 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3194 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3195 TCGv_i64 cmp = tcg_temp_new_i64();
3196
3197 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3198 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3199 tcg_temp_free_i64(cmp);
3200
3201 /* VF = !NF & !CF. */
3202 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3203 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3204
3205 /* Both NF and VF actually look at bit 31. */
3206 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3207 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3208 return true;
3209}
3210
3a7be554 3211static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3212{
bbd0968c 3213 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3214 TCGv_i32 t2;
caf1cefc 3215 TCGv_ptr ptr;
e610906c
RH
3216 unsigned vsz = vec_full_reg_size(s);
3217 unsigned desc = 0;
caf1cefc 3218 TCGCond cond;
34688dbc
RH
3219 uint64_t maxval;
3220 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3221 bool eq = a->eq == a->lt;
caf1cefc 3222
34688dbc
RH
3223 /* The greater-than conditions are all SVE2. */
3224 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3225 return false;
3226 }
bbd0968c
RH
3227 if (!sve_access_check(s)) {
3228 return true;
3229 }
3230
3231 op0 = read_cpu_reg(s, a->rn, 1);
3232 op1 = read_cpu_reg(s, a->rm, 1);
3233
caf1cefc
RH
3234 if (!a->sf) {
3235 if (a->u) {
3236 tcg_gen_ext32u_i64(op0, op0);
3237 tcg_gen_ext32u_i64(op1, op1);
3238 } else {
3239 tcg_gen_ext32s_i64(op0, op0);
3240 tcg_gen_ext32s_i64(op1, op1);
3241 }
3242 }
3243
3244 /* For the helper, compress the different conditions into a computation
3245 * of how many iterations for which the condition is true.
caf1cefc 3246 */
bbd0968c
RH
3247 t0 = tcg_temp_new_i64();
3248 t1 = tcg_temp_new_i64();
34688dbc
RH
3249
3250 if (a->lt) {
3251 tcg_gen_sub_i64(t0, op1, op0);
3252 if (a->u) {
3253 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3254 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3255 } else {
3256 maxval = a->sf ? INT64_MAX : INT32_MAX;
3257 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3258 }
3259 } else {
3260 tcg_gen_sub_i64(t0, op0, op1);
3261 if (a->u) {
3262 maxval = 0;
3263 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3264 } else {
3265 maxval = a->sf ? INT64_MIN : INT32_MIN;
3266 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3267 }
3268 }
caf1cefc 3269
4481bbf2 3270 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3271 if (eq) {
caf1cefc
RH
3272 /* Equality means one more iteration. */
3273 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3274
34688dbc
RH
3275 /*
3276 * For the less-than while, if op1 is maxval (and the only time
3277 * the addition above could overflow), then we produce an all-true
3278 * predicate by setting the count to the vector length. This is
3279 * because the pseudocode is described as an increment + compare
3280 * loop, and the maximum integer would always compare true.
3281 * Similarly, the greater-than while has the same issue with the
3282 * minimum integer due to the decrement + compare loop.
bbd0968c 3283 */
34688dbc 3284 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3285 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3286 }
3287
bbd0968c
RH
3288 /* Bound to the maximum. */
3289 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3290
3291 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3292 tcg_gen_movi_i64(t1, 0);
3293 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3294 tcg_temp_free_i64(t1);
caf1cefc 3295
bbd0968c 3296 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3297 t2 = tcg_temp_new_i32();
3298 tcg_gen_extrl_i64_i32(t2, t0);
3299 tcg_temp_free_i64(t0);
bbd0968c
RH
3300
3301 /* Scale elements to bits. */
3302 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3303
e610906c
RH
3304 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3305 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3306
3307 ptr = tcg_temp_new_ptr();
3308 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3309
34688dbc 3310 if (a->lt) {
4481bbf2 3311 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3312 } else {
4481bbf2 3313 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3314 }
caf1cefc
RH
3315 do_pred_flags(t2);
3316
3317 tcg_temp_free_ptr(ptr);
3318 tcg_temp_free_i32(t2);
caf1cefc
RH
3319 return true;
3320}
3321
14f6dad1
RH
3322static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3323{
3324 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3325 TCGv_i32 t2;
14f6dad1
RH
3326 TCGv_ptr ptr;
3327 unsigned vsz = vec_full_reg_size(s);
3328 unsigned desc = 0;
3329
3330 if (!dc_isar_feature(aa64_sve2, s)) {
3331 return false;
3332 }
3333 if (!sve_access_check(s)) {
3334 return true;
3335 }
3336
3337 op0 = read_cpu_reg(s, a->rn, 1);
3338 op1 = read_cpu_reg(s, a->rm, 1);
3339
4481bbf2 3340 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3341 diff = tcg_temp_new_i64();
3342
3343 if (a->rw) {
3344 /* WHILERW */
3345 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3346 t1 = tcg_temp_new_i64();
3347 tcg_gen_sub_i64(diff, op0, op1);
3348 tcg_gen_sub_i64(t1, op1, op0);
3349 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3350 tcg_temp_free_i64(t1);
3351 /* Round down to a multiple of ESIZE. */
3352 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3353 /* If op1 == op0, diff == 0, and the condition is always true. */
3354 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3355 } else {
3356 /* WHILEWR */
3357 tcg_gen_sub_i64(diff, op1, op0);
3358 /* Round down to a multiple of ESIZE. */
3359 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3360 /* If op0 >= op1, diff <= 0, the condition is always true. */
3361 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3362 }
3363
3364 /* Bound to the maximum. */
3365 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3366
3367 /* Since we're bounded, pass as a 32-bit type. */
3368 t2 = tcg_temp_new_i32();
3369 tcg_gen_extrl_i64_i32(t2, diff);
3370 tcg_temp_free_i64(diff);
3371
3372 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3373 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3374
3375 ptr = tcg_temp_new_ptr();
3376 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3377
4481bbf2 3378 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3379 do_pred_flags(t2);
3380
3381 tcg_temp_free_ptr(ptr);
3382 tcg_temp_free_i32(t2);
14f6dad1
RH
3383 return true;
3384}
3385
ed491961
RH
3386/*
3387 *** SVE Integer Wide Immediate - Unpredicated Group
3388 */
3389
3a7be554 3390static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3391{
3392 if (a->esz == 0) {
3393 return false;
3394 }
3395 if (sve_access_check(s)) {
3396 unsigned vsz = vec_full_reg_size(s);
3397 int dofs = vec_full_reg_offset(s, a->rd);
3398 uint64_t imm;
3399
3400 /* Decode the VFP immediate. */
3401 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3402 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3403 }
3404 return true;
3405}
3406
3a7be554 3407static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3408{
ed491961
RH
3409 if (sve_access_check(s)) {
3410 unsigned vsz = vec_full_reg_size(s);
3411 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3412 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3413 }
3414 return true;
3415}
3416
48ca613d 3417TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3418
3a7be554 3419static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3420{
3421 a->imm = -a->imm;
3a7be554 3422 return trans_ADD_zzi(s, a);
6e6a157d
RH
3423}
3424
3a7be554 3425static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3426{
53229a77 3427 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3428 static const GVecGen2s op[4] = {
3429 { .fni8 = tcg_gen_vec_sub8_i64,
3430 .fniv = tcg_gen_sub_vec,
3431 .fno = gen_helper_sve_subri_b,
53229a77 3432 .opt_opc = vecop_list,
6e6a157d
RH
3433 .vece = MO_8,
3434 .scalar_first = true },
3435 { .fni8 = tcg_gen_vec_sub16_i64,
3436 .fniv = tcg_gen_sub_vec,
3437 .fno = gen_helper_sve_subri_h,
53229a77 3438 .opt_opc = vecop_list,
6e6a157d
RH
3439 .vece = MO_16,
3440 .scalar_first = true },
3441 { .fni4 = tcg_gen_sub_i32,
3442 .fniv = tcg_gen_sub_vec,
3443 .fno = gen_helper_sve_subri_s,
53229a77 3444 .opt_opc = vecop_list,
6e6a157d
RH
3445 .vece = MO_32,
3446 .scalar_first = true },
3447 { .fni8 = tcg_gen_sub_i64,
3448 .fniv = tcg_gen_sub_vec,
3449 .fno = gen_helper_sve_subri_d,
53229a77 3450 .opt_opc = vecop_list,
6e6a157d
RH
3451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3452 .vece = MO_64,
3453 .scalar_first = true }
3454 };
3455
6e6a157d
RH
3456 if (sve_access_check(s)) {
3457 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3458 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3459 vec_full_reg_offset(s, a->rn),
9fff3fcc 3460 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3461 }
3462 return true;
3463}
3464
fa4bd72c 3465TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3466
3a7be554 3467static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3468{
6e6a157d 3469 if (sve_access_check(s)) {
138a1f7b
RH
3470 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3471 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3472 }
3473 return true;
3474}
3475
17b54d1c
RH
3476TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3477TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3478TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3479TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3480
3481static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3482{
3483 if (sve_access_check(s)) {
3484 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3485 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3486 vec_full_reg_offset(s, a->rn),
138a1f7b 3487 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3488 }
3489 return true;
3490}
3491
3492#define DO_ZZI(NAME, name) \
ef4a3958 3493 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3494 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3495 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3496 }; \
ef4a3958 3497 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3498
3499DO_ZZI(SMAX, smax)
3500DO_ZZI(UMAX, umax)
3501DO_ZZI(SMIN, smin)
3502DO_ZZI(UMIN, umin)
3503
3504#undef DO_ZZI
3505
5f425b92
RH
3506static gen_helper_gvec_4 * const dot_fns[2][2] = {
3507 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3508 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3509};
3510TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3511 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3512
814d4c52
RH
3513/*
3514 * SVE Multiply - Indexed
3515 */
3516
f3500a25
RH
3517TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3518 gen_helper_gvec_sdot_idx_b, a)
3519TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3520 gen_helper_gvec_sdot_idx_h, a)
3521TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3522 gen_helper_gvec_udot_idx_b, a)
3523TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3524 gen_helper_gvec_udot_idx_h, a)
3525
3526TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3527 gen_helper_gvec_sudot_idx_b, a)
3528TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3529 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3530
814d4c52 3531#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3532 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3533 a->rd, a->rn, a->rm, a->index)
814d4c52 3534
af031f64
RH
3535DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3536DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3537DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3538
af031f64
RH
3539DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3540DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3541DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3542
af031f64
RH
3543DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3544DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3545DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3546
814d4c52
RH
3547#undef DO_SVE2_RRX
3548
b95f5eeb 3549#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3550 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3551 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3552
3553DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3554DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3555DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3556DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3557
3558DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3559DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3560DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3561DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3562
3563DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3564DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3565DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3566DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3567
b95f5eeb
RH
3568#undef DO_SVE2_RRX_TB
3569
8a02aac7 3570#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3571 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3572
8681eb76
RH
3573DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3574DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3575DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3576
8681eb76
RH
3577DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3578DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3579DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3580
8681eb76
RH
3581DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3582DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3583DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3584
8681eb76
RH
3585DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3586DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3587DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3588
8a02aac7
RH
3589#undef DO_SVE2_RRXR
3590
c5c455d7 3591#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3592 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3593 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3594
3595DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3596DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3597DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3598DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3599
3600DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3601DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3602DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3603DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3604
3605DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3606DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3607DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3608DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3609
3610DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3611DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3612DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3613DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3614
3615DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3616DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3617DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3618DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3619
3620DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3621DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3622DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3623DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3624
3625#undef DO_SVE2_RRXR_TB
3626
3b787ed8 3627#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3628 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3629 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3630
3631DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3632DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3633
3634DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3635DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3636
21068f39
RH
3637DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3638DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3639
3b787ed8
RH
3640#undef DO_SVE2_RRXR_ROT
3641
ca40a6e6
RH
3642/*
3643 *** SVE Floating Point Multiply-Add Indexed Group
3644 */
3645
0a82d963 3646static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3647{
41bf9b67
RH
3648 static gen_helper_gvec_4_ptr * const fns[4] = {
3649 NULL,
ca40a6e6
RH
3650 gen_helper_gvec_fmla_idx_h,
3651 gen_helper_gvec_fmla_idx_s,
3652 gen_helper_gvec_fmla_idx_d,
3653 };
41bf9b67
RH
3654 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3655 (a->index << 1) | sub,
3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3657}
3658
3b879c28
RH
3659TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3660TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
0a82d963 3661
ca40a6e6
RH
3662/*
3663 *** SVE Floating Point Multiply Indexed Group
3664 */
3665
9c99ef66
RH
3666static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3667 NULL, gen_helper_gvec_fmul_idx_h,
3668 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3669};
3670TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3671 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3672 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
ca40a6e6 3673
23fbe79f
RH
3674/*
3675 *** SVE Floating Point Fast Reduction Group
3676 */
3677
3678typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3679 TCGv_ptr, TCGv_i32);
3680
5ce18efe 3681static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
23fbe79f
RH
3682 gen_helper_fp_reduce *fn)
3683{
5ce18efe
RH
3684 unsigned vsz, p2vsz;
3685 TCGv_i32 t_desc;
23fbe79f
RH
3686 TCGv_ptr t_zn, t_pg, status;
3687 TCGv_i64 temp;
3688
5ce18efe
RH
3689 if (fn == NULL) {
3690 return false;
3691 }
3692 if (!sve_access_check(s)) {
3693 return true;
3694 }
3695
3696 vsz = vec_full_reg_size(s);
3697 p2vsz = pow2ceil(vsz);
3698 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3699 temp = tcg_temp_new_i64();
3700 t_zn = tcg_temp_new_ptr();
3701 t_pg = tcg_temp_new_ptr();
3702
3703 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3704 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3705 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3706
3707 fn(temp, t_zn, t_pg, status, t_desc);
3708 tcg_temp_free_ptr(t_zn);
3709 tcg_temp_free_ptr(t_pg);
3710 tcg_temp_free_ptr(status);
23fbe79f
RH
3711
3712 write_fp_dreg(s, a->rd, temp);
3713 tcg_temp_free_i64(temp);
5ce18efe 3714 return true;
23fbe79f
RH
3715}
3716
3717#define DO_VPZ(NAME, name) \
8003e7cf
RH
3718 static gen_helper_fp_reduce * const name##_fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
23fbe79f 3721 }; \
8003e7cf 3722 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
23fbe79f
RH
3723
3724DO_VPZ(FADDV, faddv)
3725DO_VPZ(FMINNMV, fminnmv)
3726DO_VPZ(FMAXNMV, fmaxnmv)
3727DO_VPZ(FMINV, fminv)
3728DO_VPZ(FMAXV, fmaxv)
3729
8003e7cf
RH
3730#undef DO_VPZ
3731
3887c038
RH
3732/*
3733 *** SVE Floating Point Unary Operations - Unpredicated Group
3734 */
3735
de58c6b0
RH
3736static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3737 NULL, gen_helper_gvec_frecpe_h,
3738 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3739};
3740TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3887c038 3741
de58c6b0
RH
3742static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3743 NULL, gen_helper_gvec_frsqrte_h,
3744 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3745};
3746TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3887c038 3747
4d2e2a03
RH
3748/*
3749 *** SVE Floating Point Compare with Zero Group
3750 */
3751
63d6aef8 3752static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4d2e2a03
RH
3753 gen_helper_gvec_3_ptr *fn)
3754{
63d6aef8
RH
3755 if (fn == NULL) {
3756 return false;
3757 }
3758 if (sve_access_check(s)) {
3759 unsigned vsz = vec_full_reg_size(s);
3760 TCGv_ptr status =
3761 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03 3762
63d6aef8
RH
3763 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3764 vec_full_reg_offset(s, a->rn),
3765 pred_full_reg_offset(s, a->pg),
3766 status, vsz, vsz, 0, fn);
3767 tcg_temp_free_ptr(status);
3768 }
3769 return true;
4d2e2a03
RH
3770}
3771
3772#define DO_PPZ(NAME, name) \
63d6aef8
RH
3773 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3774 NULL, gen_helper_sve_##name##_h, \
3775 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
4d2e2a03 3776 }; \
63d6aef8 3777 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
4d2e2a03
RH
3778
3779DO_PPZ(FCMGE_ppz0, fcmge0)
3780DO_PPZ(FCMGT_ppz0, fcmgt0)
3781DO_PPZ(FCMLE_ppz0, fcmle0)
3782DO_PPZ(FCMLT_ppz0, fcmlt0)
3783DO_PPZ(FCMEQ_ppz0, fcmeq0)
3784DO_PPZ(FCMNE_ppz0, fcmne0)
3785
3786#undef DO_PPZ
3787
67fcd9ad
RH
3788/*
3789 *** SVE floating-point trig multiply-add coefficient
3790 */
3791
cdd85923
RH
3792static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3793 NULL, gen_helper_sve_ftmad_h,
3794 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3795};
3796TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3797 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3798 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
67fcd9ad 3799
7f9ddf64
RH
3800/*
3801 *** SVE Floating Point Accumulating Reduction Group
3802 */
3803
3a7be554 3804static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3805{
3806 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3807 TCGv_ptr, TCGv_ptr, TCGv_i32);
3808 static fadda_fn * const fns[3] = {
3809 gen_helper_sve_fadda_h,
3810 gen_helper_sve_fadda_s,
3811 gen_helper_sve_fadda_d,
3812 };
3813 unsigned vsz = vec_full_reg_size(s);
3814 TCGv_ptr t_rm, t_pg, t_fpst;
3815 TCGv_i64 t_val;
3816 TCGv_i32 t_desc;
3817
3818 if (a->esz == 0) {
3819 return false;
3820 }
3821 if (!sve_access_check(s)) {
3822 return true;
3823 }
3824
3825 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3826 t_rm = tcg_temp_new_ptr();
3827 t_pg = tcg_temp_new_ptr();
3828 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3829 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3830 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3831 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3832
3833 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3834
7f9ddf64
RH
3835 tcg_temp_free_ptr(t_fpst);
3836 tcg_temp_free_ptr(t_pg);
3837 tcg_temp_free_ptr(t_rm);
3838
3839 write_fp_dreg(s, a->rd, t_val);
3840 tcg_temp_free_i64(t_val);
3841 return true;
3842}
3843
29b80469
RH
3844/*
3845 *** SVE Floating Point Arithmetic - Unpredicated Group
3846 */
3847
29b80469 3848#define DO_FP3(NAME, name) \
bdd4ce0d 3849 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
29b80469
RH
3850 NULL, gen_helper_gvec_##name##_h, \
3851 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3852 }; \
bdd4ce0d 3853 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
29b80469
RH
3854
3855DO_FP3(FADD_zzz, fadd)
3856DO_FP3(FSUB_zzz, fsub)
3857DO_FP3(FMUL_zzz, fmul)
3858DO_FP3(FTSMUL, ftsmul)
3859DO_FP3(FRECPS, recps)
3860DO_FP3(FRSQRTS, rsqrts)
3861
3862#undef DO_FP3
3863
ec3b87c2
RH
3864/*
3865 *** SVE Floating Point Arithmetic - Predicated Group
3866 */
3867
7de2617b
RH
3868#define DO_ZPZZ_FP(NAME, FEAT, name) \
3869 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3870 NULL, gen_helper_##name##_h, \
3871 gen_helper_##name##_s, gen_helper_##name##_d \
3872 }; \
3873 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3874
3875DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3876DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3877DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3878DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3879DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3880DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3881DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3882DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3883DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3884DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3885DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
8092c6a3 3886
cc48affe
RH
3887typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3888 TCGv_i64, TCGv_ptr, TCGv_i32);
3889
3890static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3891 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3892{
3893 unsigned vsz = vec_full_reg_size(s);
3894 TCGv_ptr t_zd, t_zn, t_pg, status;
3895 TCGv_i32 desc;
3896
3897 t_zd = tcg_temp_new_ptr();
3898 t_zn = tcg_temp_new_ptr();
3899 t_pg = tcg_temp_new_ptr();
3900 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3901 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3902 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3903
cdfb22bb 3904 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3905 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
3906 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3907
cc48affe
RH
3908 tcg_temp_free_ptr(status);
3909 tcg_temp_free_ptr(t_pg);
3910 tcg_temp_free_ptr(t_zn);
3911 tcg_temp_free_ptr(t_zd);
3912}
3913
413ee8e4 3914static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
cc48affe
RH
3915 gen_helper_sve_fp2scalar *fn)
3916{
413ee8e4
RH
3917 if (fn == NULL) {
3918 return false;
3919 }
3920 if (sve_access_check(s)) {
3921 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3922 tcg_constant_i64(imm), fn);
3923 }
3924 return true;
cc48affe
RH
3925}
3926
98c37459
RH
3927#define DO_FP_IMM(NAME, name, const0, const1) \
3928 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
3929 NULL, gen_helper_sve_##name##_h, \
3930 gen_helper_sve_##name##_s, \
3931 gen_helper_sve_##name##_d \
3932 }; \
3933 static uint64_t const name##_const[4][2] = { \
3934 { -1, -1 }, \
3935 { float16_##const0, float16_##const1 }, \
3936 { float32_##const0, float32_##const1 }, \
3937 { float64_##const0, float64_##const1 }, \
3938 }; \
3939 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
3940 name##_const[a->esz][a->imm], name##_fns[a->esz])
cc48affe 3941
cc48affe
RH
3942DO_FP_IMM(FADD, fadds, half, one)
3943DO_FP_IMM(FSUB, fsubs, half, one)
3944DO_FP_IMM(FMUL, fmuls, half, two)
3945DO_FP_IMM(FSUBR, fsubrs, half, one)
3946DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3947DO_FP_IMM(FMINNM, fminnms, zero, one)
3948DO_FP_IMM(FMAX, fmaxs, zero, one)
3949DO_FP_IMM(FMIN, fmins, zero, one)
3950
3951#undef DO_FP_IMM
3952
abfdefd5
RH
3953static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3954 gen_helper_gvec_4_ptr *fn)
3955{
3956 if (fn == NULL) {
3957 return false;
3958 }
3959 if (sve_access_check(s)) {
3960 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3961 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3962 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3963 vec_full_reg_offset(s, a->rn),
3964 vec_full_reg_offset(s, a->rm),
3965 pred_full_reg_offset(s, a->pg),
3966 status, vsz, vsz, 0, fn);
3967 tcg_temp_free_ptr(status);
3968 }
3969 return true;
3970}
3971
3972#define DO_FPCMP(NAME, name) \
3a7be554 3973static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3974{ \
3975 static gen_helper_gvec_4_ptr * const fns[4] = { \
3976 NULL, gen_helper_sve_##name##_h, \
3977 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3978 }; \
3979 return do_fp_cmp(s, a, fns[a->esz]); \
3980}
3981
3982DO_FPCMP(FCMGE, fcmge)
3983DO_FPCMP(FCMGT, fcmgt)
3984DO_FPCMP(FCMEQ, fcmeq)
3985DO_FPCMP(FCMNE, fcmne)
3986DO_FPCMP(FCMUO, fcmuo)
3987DO_FPCMP(FACGE, facge)
3988DO_FPCMP(FACGT, facgt)
3989
3990#undef DO_FPCMP
3991
6f5cd670
RH
3992static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3993 NULL, gen_helper_sve_fcadd_h,
3994 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3995};
3996TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3997 a->rd, a->rn, a->rm, a->pg, a->rot,
3998 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
76a9d9cd 3999
6ceabaad 4000#define DO_FMLA(NAME, name) \
498be5b8
RH
4001 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
4002 NULL, gen_helper_sve_##name##_h, \
4003 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4004 }; \
4005 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4006 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4007 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
6ceabaad
RH
4008
4009DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4010DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4011DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4012DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4013
4014#undef DO_FMLA
4015
498be5b8
RH
4016static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4017 NULL, gen_helper_sve_fcmla_zpzzz_h,
4018 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4019};
4020TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4021 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
4022 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
05f48bab 4023
3a7be554 4024static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4025{
41bf9b67
RH
4026 static gen_helper_gvec_4_ptr * const fns[4] = {
4027 NULL,
18fc2405
RH
4028 gen_helper_gvec_fcmlah_idx,
4029 gen_helper_gvec_fcmlas_idx,
41bf9b67 4030 NULL,
18fc2405
RH
4031 };
4032
18fc2405 4033 tcg_debug_assert(a->rd == a->ra);
41bf9b67
RH
4034
4035 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
4036 a->index * 4 + a->rot,
4037 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
4038}
4039
8092c6a3
RH
4040/*
4041 *** SVE Floating Point Unary Operations Predicated Group
4042 */
4043
0360730c
RH
4044TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4045 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4046TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4047 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4048
4049TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4050 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4051
4052TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4053 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4054TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4055 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4056TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4057 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4058TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4059 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4060
4061TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4062 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4063TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4064 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4065TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4066 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4067TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4068 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4069TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4070 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4071TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4072 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4073
4074TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4075 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4076TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4077 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4078TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4079 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4080TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4081 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4082TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4083 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4084TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4085 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4086
4087TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4088 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4089TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4090 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
df4de1af 4091
ed6bb6b4
RH
4092static gen_helper_gvec_3_ptr * const frint_fns[] = {
4093 NULL,
cda3c753
RH
4094 gen_helper_sve_frint_h,
4095 gen_helper_sve_frint_s,
4096 gen_helper_sve_frint_d
4097};
0360730c
RH
4098TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4099 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
cda3c753 4100
0360730c
RH
4101static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4102 NULL,
4103 gen_helper_sve_frintx_h,
4104 gen_helper_sve_frintx_s,
4105 gen_helper_sve_frintx_d
4106};
4107TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4108 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4109
95365277
SL
4110static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4111 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4112{
13c0dd17
RH
4113 unsigned vsz;
4114 TCGv_i32 tmode;
4115 TCGv_ptr status;
cda3c753 4116
13c0dd17
RH
4117 if (fn == NULL) {
4118 return false;
4119 }
4120 if (!sve_access_check(s)) {
4121 return true;
4122 }
cda3c753 4123
13c0dd17
RH
4124 vsz = vec_full_reg_size(s);
4125 tmode = tcg_const_i32(mode);
4126 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4127
13c0dd17
RH
4128 gen_helper_set_rmode(tmode, tmode, status);
4129
4130 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4131 vec_full_reg_offset(s, a->rn),
4132 pred_full_reg_offset(s, a->pg),
4133 status, vsz, vsz, 0, fn);
4134
4135 gen_helper_set_rmode(tmode, tmode, status);
4136 tcg_temp_free_i32(tmode);
4137 tcg_temp_free_ptr(status);
cda3c753
RH
4138 return true;
4139}
4140
27645836
RH
4141TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4142 float_round_nearest_even, frint_fns[a->esz])
4143TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4144 float_round_up, frint_fns[a->esz])
4145TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4146 float_round_down, frint_fns[a->esz])
4147TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4148 float_round_to_zero, frint_fns[a->esz])
4149TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4150 float_round_ties_away, frint_fns[a->esz])
cda3c753 4151
0360730c
RH
4152static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4153 NULL, gen_helper_sve_frecpx_h,
4154 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4155};
4156TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4157 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
8092c6a3 4158
0360730c
RH
4159static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4160 NULL, gen_helper_sve_fsqrt_h,
4161 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4162};
4163TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4164 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4165
4166TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4167 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4168TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4169 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4170TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4171 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4172
4173TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4174 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4175TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4176 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4177
4178TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4179 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4180TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4181 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4182
4183TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4184 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4185TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4186 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4187TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4188 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4189
4190TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4191 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4192TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4193 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4194TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4195 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4196
4197TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4198 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
8092c6a3 4199
d1822297
RH
4200/*
4201 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4202 */
4203
4204/* Subroutine loading a vector register at VOFS of LEN bytes.
4205 * The load should begin at the address Rn + IMM.
4206 */
4207
19f2acc9 4208static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4209{
19f2acc9
RH
4210 int len_align = QEMU_ALIGN_DOWN(len, 8);
4211 int len_remain = len % 8;
4212 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4213 int midx = get_mem_index(s);
b2aa8879 4214 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4215
b2aa8879
RH
4216 dirty_addr = tcg_temp_new_i64();
4217 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4218 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4219 tcg_temp_free_i64(dirty_addr);
d1822297 4220
b2aa8879
RH
4221 /*
4222 * Note that unpredicated load/store of vector/predicate registers
d1822297 4223 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4224 * operations on larger quantities.
d1822297
RH
4225 * Attempt to keep code expansion to a minimum by limiting the
4226 * amount of unrolling done.
4227 */
4228 if (nparts <= 4) {
4229 int i;
4230
b2aa8879 4231 t0 = tcg_temp_new_i64();
d1822297 4232 for (i = 0; i < len_align; i += 8) {
fc313c64 4233 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4234 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4235 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4236 }
b2aa8879 4237 tcg_temp_free_i64(t0);
d1822297
RH
4238 } else {
4239 TCGLabel *loop = gen_new_label();
4240 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4241
b2aa8879
RH
4242 /* Copy the clean address into a local temp, live across the loop. */
4243 t0 = clean_addr;
4b4dc975 4244 clean_addr = new_tmp_a64_local(s);
b2aa8879 4245 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4246
b2aa8879 4247 gen_set_label(loop);
d1822297 4248
b2aa8879 4249 t0 = tcg_temp_new_i64();
fc313c64 4250 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4251 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4252
b2aa8879 4253 tp = tcg_temp_new_ptr();
d1822297
RH
4254 tcg_gen_add_ptr(tp, cpu_env, i);
4255 tcg_gen_addi_ptr(i, i, 8);
4256 tcg_gen_st_i64(t0, tp, vofs);
4257 tcg_temp_free_ptr(tp);
b2aa8879 4258 tcg_temp_free_i64(t0);
d1822297
RH
4259
4260 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4261 tcg_temp_free_ptr(i);
4262 }
4263
b2aa8879
RH
4264 /*
4265 * Predicate register loads can be any multiple of 2.
d1822297
RH
4266 * Note that we still store the entire 64-bit unit into cpu_env.
4267 */
4268 if (len_remain) {
b2aa8879 4269 t0 = tcg_temp_new_i64();
d1822297
RH
4270 switch (len_remain) {
4271 case 2:
4272 case 4:
4273 case 8:
b2aa8879
RH
4274 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4275 MO_LE | ctz32(len_remain));
d1822297
RH
4276 break;
4277
4278 case 6:
4279 t1 = tcg_temp_new_i64();
b2aa8879
RH
4280 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4281 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4282 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4283 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4284 tcg_temp_free_i64(t1);
4285 break;
4286
4287 default:
4288 g_assert_not_reached();
4289 }
4290 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4291 tcg_temp_free_i64(t0);
d1822297 4292 }
d1822297
RH
4293}
4294
5047c204 4295/* Similarly for stores. */
19f2acc9 4296static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4297{
19f2acc9
RH
4298 int len_align = QEMU_ALIGN_DOWN(len, 8);
4299 int len_remain = len % 8;
4300 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4301 int midx = get_mem_index(s);
bba87d0a 4302 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4303
bba87d0a
RH
4304 dirty_addr = tcg_temp_new_i64();
4305 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4306 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4307 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4308
4309 /* Note that unpredicated load/store of vector/predicate registers
4310 * are defined as a stream of bytes, which equates to little-endian
4311 * operations on larger quantities. There is no nice way to force
4312 * a little-endian store for aarch64_be-linux-user out of line.
4313 *
4314 * Attempt to keep code expansion to a minimum by limiting the
4315 * amount of unrolling done.
4316 */
4317 if (nparts <= 4) {
4318 int i;
4319
bba87d0a 4320 t0 = tcg_temp_new_i64();
5047c204
RH
4321 for (i = 0; i < len_align; i += 8) {
4322 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4323 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4324 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4325 }
bba87d0a 4326 tcg_temp_free_i64(t0);
5047c204
RH
4327 } else {
4328 TCGLabel *loop = gen_new_label();
bba87d0a 4329 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4330
bba87d0a
RH
4331 /* Copy the clean address into a local temp, live across the loop. */
4332 t0 = clean_addr;
4b4dc975 4333 clean_addr = new_tmp_a64_local(s);
bba87d0a 4334 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4335
bba87d0a 4336 gen_set_label(loop);
5047c204 4337
bba87d0a
RH
4338 t0 = tcg_temp_new_i64();
4339 tp = tcg_temp_new_ptr();
4340 tcg_gen_add_ptr(tp, cpu_env, i);
4341 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4342 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4343 tcg_temp_free_ptr(tp);
4344
fc313c64 4345 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4346 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4347 tcg_temp_free_i64(t0);
5047c204
RH
4348
4349 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4350 tcg_temp_free_ptr(i);
4351 }
4352
4353 /* Predicate register stores can be any multiple of 2. */
4354 if (len_remain) {
bba87d0a 4355 t0 = tcg_temp_new_i64();
5047c204 4356 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4357
4358 switch (len_remain) {
4359 case 2:
4360 case 4:
4361 case 8:
bba87d0a
RH
4362 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4363 MO_LE | ctz32(len_remain));
5047c204
RH
4364 break;
4365
4366 case 6:
bba87d0a
RH
4367 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4368 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4369 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4370 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4371 break;
4372
4373 default:
4374 g_assert_not_reached();
4375 }
bba87d0a 4376 tcg_temp_free_i64(t0);
5047c204 4377 }
5047c204
RH
4378}
4379
3a7be554 4380static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4381{
4382 if (sve_access_check(s)) {
4383 int size = vec_full_reg_size(s);
4384 int off = vec_full_reg_offset(s, a->rd);
4385 do_ldr(s, off, size, a->rn, a->imm * size);
4386 }
4387 return true;
4388}
4389
3a7be554 4390static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4391{
4392 if (sve_access_check(s)) {
4393 int size = pred_full_reg_size(s);
4394 int off = pred_full_reg_offset(s, a->rd);
4395 do_ldr(s, off, size, a->rn, a->imm * size);
4396 }
4397 return true;
4398}
c4e7c493 4399
3a7be554 4400static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4401{
4402 if (sve_access_check(s)) {
4403 int size = vec_full_reg_size(s);
4404 int off = vec_full_reg_offset(s, a->rd);
4405 do_str(s, off, size, a->rn, a->imm * size);
4406 }
4407 return true;
4408}
4409
3a7be554 4410static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4411{
4412 if (sve_access_check(s)) {
4413 int size = pred_full_reg_size(s);
4414 int off = pred_full_reg_offset(s, a->rd);
4415 do_str(s, off, size, a->rn, a->imm * size);
4416 }
4417 return true;
4418}
4419
c4e7c493
RH
4420/*
4421 *** SVE Memory - Contiguous Load Group
4422 */
4423
4424/* The memory mode of the dtype. */
14776ab5 4425static const MemOp dtype_mop[16] = {
c4e7c493
RH
4426 MO_UB, MO_UB, MO_UB, MO_UB,
4427 MO_SL, MO_UW, MO_UW, MO_UW,
4428 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4429 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4430};
4431
4432#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4433
4434/* The vector element size of dtype. */
4435static const uint8_t dtype_esz[16] = {
4436 0, 1, 2, 3,
4437 3, 1, 2, 3,
4438 3, 2, 2, 3,
4439 3, 2, 1, 3
4440};
4441
4442static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4443 int dtype, uint32_t mte_n, bool is_write,
4444 gen_helper_gvec_mem *fn)
c4e7c493
RH
4445{
4446 unsigned vsz = vec_full_reg_size(s);
4447 TCGv_ptr t_pg;
206adacf 4448 int desc = 0;
c4e7c493 4449
206adacf
RH
4450 /*
4451 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4452 * registers as pointers, so encode the regno into the data field.
4453 * For consistency, do this even for LD1.
4454 */
9473d0ec 4455 if (s->mte_active[0]) {
206adacf
RH
4456 int msz = dtype_msz(dtype);
4457
4458 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4459 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4460 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4461 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4462 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4463 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4464 } else {
4465 addr = clean_data_tbi(s, addr);
206adacf 4466 }
9473d0ec 4467
206adacf 4468 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4469 t_pg = tcg_temp_new_ptr();
4470
4471 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4472 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4473
4474 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4475}
4476
c182c6db
RH
4477/* Indexed by [mte][be][dtype][nreg] */
4478static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4479 { /* mte inactive, little-endian */
4480 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4481 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4482 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4483 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4484 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4485
4486 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4487 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4488 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4489 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4490 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4491
4492 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4493 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4494 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4495 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4496 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4497
4498 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4499 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4500 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4501 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4502 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4503
4504 /* mte inactive, big-endian */
4505 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4506 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4507 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4508 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4509 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4510
4511 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4512 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4513 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4514 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4515 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4516
4517 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4518 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4519 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4520 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4521 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4522
4523 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4524 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4525 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4526 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4527 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4528
4529 { /* mte active, little-endian */
4530 { { gen_helper_sve_ld1bb_r_mte,
4531 gen_helper_sve_ld2bb_r_mte,
4532 gen_helper_sve_ld3bb_r_mte,
4533 gen_helper_sve_ld4bb_r_mte },
4534 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4535 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4536 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4537
4538 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4539 { gen_helper_sve_ld1hh_le_r_mte,
4540 gen_helper_sve_ld2hh_le_r_mte,
4541 gen_helper_sve_ld3hh_le_r_mte,
4542 gen_helper_sve_ld4hh_le_r_mte },
4543 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4544 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4545
4546 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4547 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1ss_le_r_mte,
4549 gen_helper_sve_ld2ss_le_r_mte,
4550 gen_helper_sve_ld3ss_le_r_mte,
4551 gen_helper_sve_ld4ss_le_r_mte },
4552 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4553
4554 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4555 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4557 { gen_helper_sve_ld1dd_le_r_mte,
4558 gen_helper_sve_ld2dd_le_r_mte,
4559 gen_helper_sve_ld3dd_le_r_mte,
4560 gen_helper_sve_ld4dd_le_r_mte } },
4561
4562 /* mte active, big-endian */
4563 { { gen_helper_sve_ld1bb_r_mte,
4564 gen_helper_sve_ld2bb_r_mte,
4565 gen_helper_sve_ld3bb_r_mte,
4566 gen_helper_sve_ld4bb_r_mte },
4567 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4568 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4569 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4570
4571 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1hh_be_r_mte,
4573 gen_helper_sve_ld2hh_be_r_mte,
4574 gen_helper_sve_ld3hh_be_r_mte,
4575 gen_helper_sve_ld4hh_be_r_mte },
4576 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4577 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4578
4579 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4580 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4581 { gen_helper_sve_ld1ss_be_r_mte,
4582 gen_helper_sve_ld2ss_be_r_mte,
4583 gen_helper_sve_ld3ss_be_r_mte,
4584 gen_helper_sve_ld4ss_be_r_mte },
4585 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4586
4587 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4588 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4589 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1dd_be_r_mte,
4591 gen_helper_sve_ld2dd_be_r_mte,
4592 gen_helper_sve_ld3dd_be_r_mte,
4593 gen_helper_sve_ld4dd_be_r_mte } } },
4594};
4595
c4e7c493
RH
4596static void do_ld_zpa(DisasContext *s, int zt, int pg,
4597 TCGv_i64 addr, int dtype, int nreg)
4598{
206adacf 4599 gen_helper_gvec_mem *fn
c182c6db 4600 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4601
206adacf
RH
4602 /*
4603 * While there are holes in the table, they are not
c4e7c493
RH
4604 * accessible via the instruction encoding.
4605 */
4606 assert(fn != NULL);
206adacf 4607 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4608}
4609
3a7be554 4610static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4611{
4612 if (a->rm == 31) {
4613 return false;
4614 }
4615 if (sve_access_check(s)) {
4616 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4617 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4618 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4619 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4620 }
4621 return true;
4622}
4623
3a7be554 4624static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4625{
4626 if (sve_access_check(s)) {
4627 int vsz = vec_full_reg_size(s);
4628 int elements = vsz >> dtype_esz[a->dtype];
4629 TCGv_i64 addr = new_tmp_a64(s);
4630
4631 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4632 (a->imm * elements * (a->nreg + 1))
4633 << dtype_msz(a->dtype));
4634 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4635 }
4636 return true;
4637}
e2654d75 4638
3a7be554 4639static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4640{
aa13f7c3
RH
4641 static gen_helper_gvec_mem * const fns[2][2][16] = {
4642 { /* mte inactive, little-endian */
4643 { gen_helper_sve_ldff1bb_r,
4644 gen_helper_sve_ldff1bhu_r,
4645 gen_helper_sve_ldff1bsu_r,
4646 gen_helper_sve_ldff1bdu_r,
4647
4648 gen_helper_sve_ldff1sds_le_r,
4649 gen_helper_sve_ldff1hh_le_r,
4650 gen_helper_sve_ldff1hsu_le_r,
4651 gen_helper_sve_ldff1hdu_le_r,
4652
4653 gen_helper_sve_ldff1hds_le_r,
4654 gen_helper_sve_ldff1hss_le_r,
4655 gen_helper_sve_ldff1ss_le_r,
4656 gen_helper_sve_ldff1sdu_le_r,
4657
4658 gen_helper_sve_ldff1bds_r,
4659 gen_helper_sve_ldff1bss_r,
4660 gen_helper_sve_ldff1bhs_r,
4661 gen_helper_sve_ldff1dd_le_r },
4662
4663 /* mte inactive, big-endian */
4664 { gen_helper_sve_ldff1bb_r,
4665 gen_helper_sve_ldff1bhu_r,
4666 gen_helper_sve_ldff1bsu_r,
4667 gen_helper_sve_ldff1bdu_r,
4668
4669 gen_helper_sve_ldff1sds_be_r,
4670 gen_helper_sve_ldff1hh_be_r,
4671 gen_helper_sve_ldff1hsu_be_r,
4672 gen_helper_sve_ldff1hdu_be_r,
4673
4674 gen_helper_sve_ldff1hds_be_r,
4675 gen_helper_sve_ldff1hss_be_r,
4676 gen_helper_sve_ldff1ss_be_r,
4677 gen_helper_sve_ldff1sdu_be_r,
4678
4679 gen_helper_sve_ldff1bds_r,
4680 gen_helper_sve_ldff1bss_r,
4681 gen_helper_sve_ldff1bhs_r,
4682 gen_helper_sve_ldff1dd_be_r } },
4683
4684 { /* mte active, little-endian */
4685 { gen_helper_sve_ldff1bb_r_mte,
4686 gen_helper_sve_ldff1bhu_r_mte,
4687 gen_helper_sve_ldff1bsu_r_mte,
4688 gen_helper_sve_ldff1bdu_r_mte,
4689
4690 gen_helper_sve_ldff1sds_le_r_mte,
4691 gen_helper_sve_ldff1hh_le_r_mte,
4692 gen_helper_sve_ldff1hsu_le_r_mte,
4693 gen_helper_sve_ldff1hdu_le_r_mte,
4694
4695 gen_helper_sve_ldff1hds_le_r_mte,
4696 gen_helper_sve_ldff1hss_le_r_mte,
4697 gen_helper_sve_ldff1ss_le_r_mte,
4698 gen_helper_sve_ldff1sdu_le_r_mte,
4699
4700 gen_helper_sve_ldff1bds_r_mte,
4701 gen_helper_sve_ldff1bss_r_mte,
4702 gen_helper_sve_ldff1bhs_r_mte,
4703 gen_helper_sve_ldff1dd_le_r_mte },
4704
4705 /* mte active, big-endian */
4706 { gen_helper_sve_ldff1bb_r_mte,
4707 gen_helper_sve_ldff1bhu_r_mte,
4708 gen_helper_sve_ldff1bsu_r_mte,
4709 gen_helper_sve_ldff1bdu_r_mte,
4710
4711 gen_helper_sve_ldff1sds_be_r_mte,
4712 gen_helper_sve_ldff1hh_be_r_mte,
4713 gen_helper_sve_ldff1hsu_be_r_mte,
4714 gen_helper_sve_ldff1hdu_be_r_mte,
4715
4716 gen_helper_sve_ldff1hds_be_r_mte,
4717 gen_helper_sve_ldff1hss_be_r_mte,
4718 gen_helper_sve_ldff1ss_be_r_mte,
4719 gen_helper_sve_ldff1sdu_be_r_mte,
4720
4721 gen_helper_sve_ldff1bds_r_mte,
4722 gen_helper_sve_ldff1bss_r_mte,
4723 gen_helper_sve_ldff1bhs_r_mte,
4724 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4725 };
4726
4727 if (sve_access_check(s)) {
4728 TCGv_i64 addr = new_tmp_a64(s);
4729 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4730 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4731 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4732 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4733 }
4734 return true;
4735}
4736
3a7be554 4737static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4738{
aa13f7c3
RH
4739 static gen_helper_gvec_mem * const fns[2][2][16] = {
4740 { /* mte inactive, little-endian */
4741 { gen_helper_sve_ldnf1bb_r,
4742 gen_helper_sve_ldnf1bhu_r,
4743 gen_helper_sve_ldnf1bsu_r,
4744 gen_helper_sve_ldnf1bdu_r,
4745
4746 gen_helper_sve_ldnf1sds_le_r,
4747 gen_helper_sve_ldnf1hh_le_r,
4748 gen_helper_sve_ldnf1hsu_le_r,
4749 gen_helper_sve_ldnf1hdu_le_r,
4750
4751 gen_helper_sve_ldnf1hds_le_r,
4752 gen_helper_sve_ldnf1hss_le_r,
4753 gen_helper_sve_ldnf1ss_le_r,
4754 gen_helper_sve_ldnf1sdu_le_r,
4755
4756 gen_helper_sve_ldnf1bds_r,
4757 gen_helper_sve_ldnf1bss_r,
4758 gen_helper_sve_ldnf1bhs_r,
4759 gen_helper_sve_ldnf1dd_le_r },
4760
4761 /* mte inactive, big-endian */
4762 { gen_helper_sve_ldnf1bb_r,
4763 gen_helper_sve_ldnf1bhu_r,
4764 gen_helper_sve_ldnf1bsu_r,
4765 gen_helper_sve_ldnf1bdu_r,
4766
4767 gen_helper_sve_ldnf1sds_be_r,
4768 gen_helper_sve_ldnf1hh_be_r,
4769 gen_helper_sve_ldnf1hsu_be_r,
4770 gen_helper_sve_ldnf1hdu_be_r,
4771
4772 gen_helper_sve_ldnf1hds_be_r,
4773 gen_helper_sve_ldnf1hss_be_r,
4774 gen_helper_sve_ldnf1ss_be_r,
4775 gen_helper_sve_ldnf1sdu_be_r,
4776
4777 gen_helper_sve_ldnf1bds_r,
4778 gen_helper_sve_ldnf1bss_r,
4779 gen_helper_sve_ldnf1bhs_r,
4780 gen_helper_sve_ldnf1dd_be_r } },
4781
4782 { /* mte inactive, little-endian */
4783 { gen_helper_sve_ldnf1bb_r_mte,
4784 gen_helper_sve_ldnf1bhu_r_mte,
4785 gen_helper_sve_ldnf1bsu_r_mte,
4786 gen_helper_sve_ldnf1bdu_r_mte,
4787
4788 gen_helper_sve_ldnf1sds_le_r_mte,
4789 gen_helper_sve_ldnf1hh_le_r_mte,
4790 gen_helper_sve_ldnf1hsu_le_r_mte,
4791 gen_helper_sve_ldnf1hdu_le_r_mte,
4792
4793 gen_helper_sve_ldnf1hds_le_r_mte,
4794 gen_helper_sve_ldnf1hss_le_r_mte,
4795 gen_helper_sve_ldnf1ss_le_r_mte,
4796 gen_helper_sve_ldnf1sdu_le_r_mte,
4797
4798 gen_helper_sve_ldnf1bds_r_mte,
4799 gen_helper_sve_ldnf1bss_r_mte,
4800 gen_helper_sve_ldnf1bhs_r_mte,
4801 gen_helper_sve_ldnf1dd_le_r_mte },
4802
4803 /* mte inactive, big-endian */
4804 { gen_helper_sve_ldnf1bb_r_mte,
4805 gen_helper_sve_ldnf1bhu_r_mte,
4806 gen_helper_sve_ldnf1bsu_r_mte,
4807 gen_helper_sve_ldnf1bdu_r_mte,
4808
4809 gen_helper_sve_ldnf1sds_be_r_mte,
4810 gen_helper_sve_ldnf1hh_be_r_mte,
4811 gen_helper_sve_ldnf1hsu_be_r_mte,
4812 gen_helper_sve_ldnf1hdu_be_r_mte,
4813
4814 gen_helper_sve_ldnf1hds_be_r_mte,
4815 gen_helper_sve_ldnf1hss_be_r_mte,
4816 gen_helper_sve_ldnf1ss_be_r_mte,
4817 gen_helper_sve_ldnf1sdu_be_r_mte,
4818
4819 gen_helper_sve_ldnf1bds_r_mte,
4820 gen_helper_sve_ldnf1bss_r_mte,
4821 gen_helper_sve_ldnf1bhs_r_mte,
4822 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4823 };
4824
4825 if (sve_access_check(s)) {
4826 int vsz = vec_full_reg_size(s);
4827 int elements = vsz >> dtype_esz[a->dtype];
4828 int off = (a->imm * elements) << dtype_msz(a->dtype);
4829 TCGv_i64 addr = new_tmp_a64(s);
4830
4831 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4832 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4833 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4834 }
4835 return true;
4836}
1a039c7e 4837
c182c6db 4838static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 4839{
05abe304
RH
4840 unsigned vsz = vec_full_reg_size(s);
4841 TCGv_ptr t_pg;
7924d239 4842 int poff;
05abe304
RH
4843
4844 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
4845 poff = pred_full_reg_offset(s, pg);
4846 if (vsz > 16) {
4847 /*
4848 * Zero-extend the first 16 bits of the predicate into a temporary.
4849 * This avoids triggering an assert making sure we don't have bits
4850 * set within a predicate beyond VQ, but we have lowered VQ to 1
4851 * for this load operation.
4852 */
4853 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4854#if HOST_BIG_ENDIAN
2a99ab2b
RH
4855 poff += 6;
4856#endif
4857 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4858
4859 poff = offsetof(CPUARMState, vfp.preg_tmp);
4860 tcg_gen_st_i64(tmp, cpu_env, poff);
4861 tcg_temp_free_i64(tmp);
4862 }
4863
05abe304 4864 t_pg = tcg_temp_new_ptr();
2a99ab2b 4865 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4866
c182c6db
RH
4867 gen_helper_gvec_mem *fn
4868 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 4869 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
4870
4871 tcg_temp_free_ptr(t_pg);
05abe304
RH
4872
4873 /* Replicate that first quadword. */
4874 if (vsz > 16) {
7924d239
RH
4875 int doff = vec_full_reg_offset(s, zt);
4876 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
4877 }
4878}
4879
3a7be554 4880static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4881{
4882 if (a->rm == 31) {
4883 return false;
4884 }
4885 if (sve_access_check(s)) {
4886 int msz = dtype_msz(a->dtype);
4887 TCGv_i64 addr = new_tmp_a64(s);
4888 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4889 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 4890 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4891 }
4892 return true;
4893}
4894
3a7be554 4895static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4896{
4897 if (sve_access_check(s)) {
4898 TCGv_i64 addr = new_tmp_a64(s);
4899 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 4900 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4901 }
4902 return true;
4903}
4904
12c563f6
RH
4905static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4906{
4907 unsigned vsz = vec_full_reg_size(s);
4908 unsigned vsz_r32;
4909 TCGv_ptr t_pg;
4910 int poff, doff;
4911
4912 if (vsz < 32) {
4913 /*
4914 * Note that this UNDEFINED check comes after CheckSVEEnabled()
4915 * in the ARM pseudocode, which is the sve_access_check() done
4916 * in our caller. We should not now return false from the caller.
4917 */
4918 unallocated_encoding(s);
4919 return;
4920 }
4921
4922 /* Load the first octaword using the normal predicated load helpers. */
4923
4924 poff = pred_full_reg_offset(s, pg);
4925 if (vsz > 32) {
4926 /*
4927 * Zero-extend the first 32 bits of the predicate into a temporary.
4928 * This avoids triggering an assert making sure we don't have bits
4929 * set within a predicate beyond VQ, but we have lowered VQ to 2
4930 * for this load operation.
4931 */
4932 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4933#if HOST_BIG_ENDIAN
12c563f6
RH
4934 poff += 4;
4935#endif
4936 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
4937
4938 poff = offsetof(CPUARMState, vfp.preg_tmp);
4939 tcg_gen_st_i64(tmp, cpu_env, poff);
4940 tcg_temp_free_i64(tmp);
4941 }
4942
4943 t_pg = tcg_temp_new_ptr();
4944 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4945
4946 gen_helper_gvec_mem *fn
4947 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4948 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
4949
4950 tcg_temp_free_ptr(t_pg);
4951
4952 /*
4953 * Replicate that first octaword.
4954 * The replication happens in units of 32; if the full vector size
4955 * is not a multiple of 32, the final bits are zeroed.
4956 */
4957 doff = vec_full_reg_offset(s, zt);
4958 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4959 if (vsz >= 64) {
4960 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4961 }
4962 vsz -= vsz_r32;
4963 if (vsz) {
4964 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4965 }
4966}
4967
4968static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4969{
4970 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4971 return false;
4972 }
4973 if (a->rm == 31) {
4974 return false;
4975 }
4976 if (sve_access_check(s)) {
4977 TCGv_i64 addr = new_tmp_a64(s);
4978 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4979 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4980 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4981 }
4982 return true;
4983}
4984
4985static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4986{
4987 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4988 return false;
4989 }
4990 if (sve_access_check(s)) {
4991 TCGv_i64 addr = new_tmp_a64(s);
4992 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4993 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4994 }
4995 return true;
4996}
4997
68459864 4998/* Load and broadcast element. */
3a7be554 4999static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5000{
68459864
RH
5001 unsigned vsz = vec_full_reg_size(s);
5002 unsigned psz = pred_full_reg_size(s);
5003 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5004 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5005 TCGLabel *over;
4ac430e1 5006 TCGv_i64 temp, clean_addr;
68459864 5007
c0ed9166
RH
5008 if (!sve_access_check(s)) {
5009 return true;
5010 }
5011
5012 over = gen_new_label();
5013
68459864
RH
5014 /* If the guarding predicate has no bits set, no load occurs. */
5015 if (psz <= 8) {
5016 /* Reduce the pred_esz_masks value simply to reduce the
5017 * size of the code generated here.
5018 */
5019 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5020 temp = tcg_temp_new_i64();
5021 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5022 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5023 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5024 tcg_temp_free_i64(temp);
5025 } else {
5026 TCGv_i32 t32 = tcg_temp_new_i32();
5027 find_last_active(s, t32, esz, a->pg);
5028 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5029 tcg_temp_free_i32(t32);
5030 }
5031
5032 /* Load the data. */
5033 temp = tcg_temp_new_i64();
d0e372b0 5034 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5035 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5036
5037 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5038 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5039
5040 /* Broadcast to *all* elements. */
5041 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5042 vsz, vsz, temp);
5043 tcg_temp_free_i64(temp);
5044
5045 /* Zero the inactive elements. */
5046 gen_set_label(over);
60245996 5047 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5048}
5049
1a039c7e
RH
5050static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5051 int msz, int esz, int nreg)
5052{
71b9f394
RH
5053 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5054 { { { gen_helper_sve_st1bb_r,
5055 gen_helper_sve_st1bh_r,
5056 gen_helper_sve_st1bs_r,
5057 gen_helper_sve_st1bd_r },
5058 { NULL,
5059 gen_helper_sve_st1hh_le_r,
5060 gen_helper_sve_st1hs_le_r,
5061 gen_helper_sve_st1hd_le_r },
5062 { NULL, NULL,
5063 gen_helper_sve_st1ss_le_r,
5064 gen_helper_sve_st1sd_le_r },
5065 { NULL, NULL, NULL,
5066 gen_helper_sve_st1dd_le_r } },
5067 { { gen_helper_sve_st1bb_r,
5068 gen_helper_sve_st1bh_r,
5069 gen_helper_sve_st1bs_r,
5070 gen_helper_sve_st1bd_r },
5071 { NULL,
5072 gen_helper_sve_st1hh_be_r,
5073 gen_helper_sve_st1hs_be_r,
5074 gen_helper_sve_st1hd_be_r },
5075 { NULL, NULL,
5076 gen_helper_sve_st1ss_be_r,
5077 gen_helper_sve_st1sd_be_r },
5078 { NULL, NULL, NULL,
5079 gen_helper_sve_st1dd_be_r } } },
5080
5081 { { { gen_helper_sve_st1bb_r_mte,
5082 gen_helper_sve_st1bh_r_mte,
5083 gen_helper_sve_st1bs_r_mte,
5084 gen_helper_sve_st1bd_r_mte },
5085 { NULL,
5086 gen_helper_sve_st1hh_le_r_mte,
5087 gen_helper_sve_st1hs_le_r_mte,
5088 gen_helper_sve_st1hd_le_r_mte },
5089 { NULL, NULL,
5090 gen_helper_sve_st1ss_le_r_mte,
5091 gen_helper_sve_st1sd_le_r_mte },
5092 { NULL, NULL, NULL,
5093 gen_helper_sve_st1dd_le_r_mte } },
5094 { { gen_helper_sve_st1bb_r_mte,
5095 gen_helper_sve_st1bh_r_mte,
5096 gen_helper_sve_st1bs_r_mte,
5097 gen_helper_sve_st1bd_r_mte },
5098 { NULL,
5099 gen_helper_sve_st1hh_be_r_mte,
5100 gen_helper_sve_st1hs_be_r_mte,
5101 gen_helper_sve_st1hd_be_r_mte },
5102 { NULL, NULL,
5103 gen_helper_sve_st1ss_be_r_mte,
5104 gen_helper_sve_st1sd_be_r_mte },
5105 { NULL, NULL, NULL,
5106 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5107 };
71b9f394
RH
5108 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5109 { { { gen_helper_sve_st2bb_r,
5110 gen_helper_sve_st2hh_le_r,
5111 gen_helper_sve_st2ss_le_r,
5112 gen_helper_sve_st2dd_le_r },
5113 { gen_helper_sve_st3bb_r,
5114 gen_helper_sve_st3hh_le_r,
5115 gen_helper_sve_st3ss_le_r,
5116 gen_helper_sve_st3dd_le_r },
5117 { gen_helper_sve_st4bb_r,
5118 gen_helper_sve_st4hh_le_r,
5119 gen_helper_sve_st4ss_le_r,
5120 gen_helper_sve_st4dd_le_r } },
5121 { { gen_helper_sve_st2bb_r,
5122 gen_helper_sve_st2hh_be_r,
5123 gen_helper_sve_st2ss_be_r,
5124 gen_helper_sve_st2dd_be_r },
5125 { gen_helper_sve_st3bb_r,
5126 gen_helper_sve_st3hh_be_r,
5127 gen_helper_sve_st3ss_be_r,
5128 gen_helper_sve_st3dd_be_r },
5129 { gen_helper_sve_st4bb_r,
5130 gen_helper_sve_st4hh_be_r,
5131 gen_helper_sve_st4ss_be_r,
5132 gen_helper_sve_st4dd_be_r } } },
5133 { { { gen_helper_sve_st2bb_r_mte,
5134 gen_helper_sve_st2hh_le_r_mte,
5135 gen_helper_sve_st2ss_le_r_mte,
5136 gen_helper_sve_st2dd_le_r_mte },
5137 { gen_helper_sve_st3bb_r_mte,
5138 gen_helper_sve_st3hh_le_r_mte,
5139 gen_helper_sve_st3ss_le_r_mte,
5140 gen_helper_sve_st3dd_le_r_mte },
5141 { gen_helper_sve_st4bb_r_mte,
5142 gen_helper_sve_st4hh_le_r_mte,
5143 gen_helper_sve_st4ss_le_r_mte,
5144 gen_helper_sve_st4dd_le_r_mte } },
5145 { { gen_helper_sve_st2bb_r_mte,
5146 gen_helper_sve_st2hh_be_r_mte,
5147 gen_helper_sve_st2ss_be_r_mte,
5148 gen_helper_sve_st2dd_be_r_mte },
5149 { gen_helper_sve_st3bb_r_mte,
5150 gen_helper_sve_st3hh_be_r_mte,
5151 gen_helper_sve_st3ss_be_r_mte,
5152 gen_helper_sve_st3dd_be_r_mte },
5153 { gen_helper_sve_st4bb_r_mte,
5154 gen_helper_sve_st4hh_be_r_mte,
5155 gen_helper_sve_st4ss_be_r_mte,
5156 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5157 };
5158 gen_helper_gvec_mem *fn;
28d57f2d 5159 int be = s->be_data == MO_BE;
1a039c7e
RH
5160
5161 if (nreg == 0) {
5162 /* ST1 */
71b9f394
RH
5163 fn = fn_single[s->mte_active[0]][be][msz][esz];
5164 nreg = 1;
1a039c7e
RH
5165 } else {
5166 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5167 assert(msz == esz);
71b9f394 5168 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5169 }
5170 assert(fn != NULL);
71b9f394 5171 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5172}
5173
3a7be554 5174static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5175{
5176 if (a->rm == 31 || a->msz > a->esz) {
5177 return false;
5178 }
5179 if (sve_access_check(s)) {
5180 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5181 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5182 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5183 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5184 }
5185 return true;
5186}
5187
3a7be554 5188static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5189{
5190 if (a->msz > a->esz) {
5191 return false;
5192 }
5193 if (sve_access_check(s)) {
5194 int vsz = vec_full_reg_size(s);
5195 int elements = vsz >> a->esz;
5196 TCGv_i64 addr = new_tmp_a64(s);
5197
5198 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5199 (a->imm * elements * (a->nreg + 1)) << a->msz);
5200 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5201 }
5202 return true;
5203}
f6dbf62a
RH
5204
5205/*
5206 *** SVE gather loads / scatter stores
5207 */
5208
500d0484 5209static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5210 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5211 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5212{
5213 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5214 TCGv_ptr t_zm = tcg_temp_new_ptr();
5215 TCGv_ptr t_pg = tcg_temp_new_ptr();
5216 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5217 int desc = 0;
500d0484 5218
d28d12f0
RH
5219 if (s->mte_active[0]) {
5220 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5221 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5222 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5223 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5224 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5225 desc <<= SVE_MTEDESC_SHIFT;
5226 }
cdecb3fc 5227 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5228
5229 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5230 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5231 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5232 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5233
5234 tcg_temp_free_ptr(t_zt);
5235 tcg_temp_free_ptr(t_zm);
5236 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5237}
5238
d28d12f0
RH
5239/* Indexed by [mte][be][ff][xs][u][msz]. */
5240static gen_helper_gvec_mem_scatter * const
5241gather_load_fn32[2][2][2][2][2][3] = {
5242 { /* MTE Inactive */
5243 { /* Little-endian */
5244 { { { gen_helper_sve_ldbss_zsu,
5245 gen_helper_sve_ldhss_le_zsu,
5246 NULL, },
5247 { gen_helper_sve_ldbsu_zsu,
5248 gen_helper_sve_ldhsu_le_zsu,
5249 gen_helper_sve_ldss_le_zsu, } },
5250 { { gen_helper_sve_ldbss_zss,
5251 gen_helper_sve_ldhss_le_zss,
5252 NULL, },
5253 { gen_helper_sve_ldbsu_zss,
5254 gen_helper_sve_ldhsu_le_zss,
5255 gen_helper_sve_ldss_le_zss, } } },
5256
5257 /* First-fault */
5258 { { { gen_helper_sve_ldffbss_zsu,
5259 gen_helper_sve_ldffhss_le_zsu,
5260 NULL, },
5261 { gen_helper_sve_ldffbsu_zsu,
5262 gen_helper_sve_ldffhsu_le_zsu,
5263 gen_helper_sve_ldffss_le_zsu, } },
5264 { { gen_helper_sve_ldffbss_zss,
5265 gen_helper_sve_ldffhss_le_zss,
5266 NULL, },
5267 { gen_helper_sve_ldffbsu_zss,
5268 gen_helper_sve_ldffhsu_le_zss,
5269 gen_helper_sve_ldffss_le_zss, } } } },
5270
5271 { /* Big-endian */
5272 { { { gen_helper_sve_ldbss_zsu,
5273 gen_helper_sve_ldhss_be_zsu,
5274 NULL, },
5275 { gen_helper_sve_ldbsu_zsu,
5276 gen_helper_sve_ldhsu_be_zsu,
5277 gen_helper_sve_ldss_be_zsu, } },
5278 { { gen_helper_sve_ldbss_zss,
5279 gen_helper_sve_ldhss_be_zss,
5280 NULL, },
5281 { gen_helper_sve_ldbsu_zss,
5282 gen_helper_sve_ldhsu_be_zss,
5283 gen_helper_sve_ldss_be_zss, } } },
5284
5285 /* First-fault */
5286 { { { gen_helper_sve_ldffbss_zsu,
5287 gen_helper_sve_ldffhss_be_zsu,
5288 NULL, },
5289 { gen_helper_sve_ldffbsu_zsu,
5290 gen_helper_sve_ldffhsu_be_zsu,
5291 gen_helper_sve_ldffss_be_zsu, } },
5292 { { gen_helper_sve_ldffbss_zss,
5293 gen_helper_sve_ldffhss_be_zss,
5294 NULL, },
5295 { gen_helper_sve_ldffbsu_zss,
5296 gen_helper_sve_ldffhsu_be_zss,
5297 gen_helper_sve_ldffss_be_zss, } } } } },
5298 { /* MTE Active */
5299 { /* Little-endian */
5300 { { { gen_helper_sve_ldbss_zsu_mte,
5301 gen_helper_sve_ldhss_le_zsu_mte,
5302 NULL, },
5303 { gen_helper_sve_ldbsu_zsu_mte,
5304 gen_helper_sve_ldhsu_le_zsu_mte,
5305 gen_helper_sve_ldss_le_zsu_mte, } },
5306 { { gen_helper_sve_ldbss_zss_mte,
5307 gen_helper_sve_ldhss_le_zss_mte,
5308 NULL, },
5309 { gen_helper_sve_ldbsu_zss_mte,
5310 gen_helper_sve_ldhsu_le_zss_mte,
5311 gen_helper_sve_ldss_le_zss_mte, } } },
5312
5313 /* First-fault */
5314 { { { gen_helper_sve_ldffbss_zsu_mte,
5315 gen_helper_sve_ldffhss_le_zsu_mte,
5316 NULL, },
5317 { gen_helper_sve_ldffbsu_zsu_mte,
5318 gen_helper_sve_ldffhsu_le_zsu_mte,
5319 gen_helper_sve_ldffss_le_zsu_mte, } },
5320 { { gen_helper_sve_ldffbss_zss_mte,
5321 gen_helper_sve_ldffhss_le_zss_mte,
5322 NULL, },
5323 { gen_helper_sve_ldffbsu_zss_mte,
5324 gen_helper_sve_ldffhsu_le_zss_mte,
5325 gen_helper_sve_ldffss_le_zss_mte, } } } },
5326
5327 { /* Big-endian */
5328 { { { gen_helper_sve_ldbss_zsu_mte,
5329 gen_helper_sve_ldhss_be_zsu_mte,
5330 NULL, },
5331 { gen_helper_sve_ldbsu_zsu_mte,
5332 gen_helper_sve_ldhsu_be_zsu_mte,
5333 gen_helper_sve_ldss_be_zsu_mte, } },
5334 { { gen_helper_sve_ldbss_zss_mte,
5335 gen_helper_sve_ldhss_be_zss_mte,
5336 NULL, },
5337 { gen_helper_sve_ldbsu_zss_mte,
5338 gen_helper_sve_ldhsu_be_zss_mte,
5339 gen_helper_sve_ldss_be_zss_mte, } } },
5340
5341 /* First-fault */
5342 { { { gen_helper_sve_ldffbss_zsu_mte,
5343 gen_helper_sve_ldffhss_be_zsu_mte,
5344 NULL, },
5345 { gen_helper_sve_ldffbsu_zsu_mte,
5346 gen_helper_sve_ldffhsu_be_zsu_mte,
5347 gen_helper_sve_ldffss_be_zsu_mte, } },
5348 { { gen_helper_sve_ldffbss_zss_mte,
5349 gen_helper_sve_ldffhss_be_zss_mte,
5350 NULL, },
5351 { gen_helper_sve_ldffbsu_zss_mte,
5352 gen_helper_sve_ldffhsu_be_zss_mte,
5353 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5354};
5355
5356/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5357static gen_helper_gvec_mem_scatter * const
5358gather_load_fn64[2][2][2][3][2][4] = {
5359 { /* MTE Inactive */
5360 { /* Little-endian */
5361 { { { gen_helper_sve_ldbds_zsu,
5362 gen_helper_sve_ldhds_le_zsu,
5363 gen_helper_sve_ldsds_le_zsu,
5364 NULL, },
5365 { gen_helper_sve_ldbdu_zsu,
5366 gen_helper_sve_ldhdu_le_zsu,
5367 gen_helper_sve_ldsdu_le_zsu,
5368 gen_helper_sve_lddd_le_zsu, } },
5369 { { gen_helper_sve_ldbds_zss,
5370 gen_helper_sve_ldhds_le_zss,
5371 gen_helper_sve_ldsds_le_zss,
5372 NULL, },
5373 { gen_helper_sve_ldbdu_zss,
5374 gen_helper_sve_ldhdu_le_zss,
5375 gen_helper_sve_ldsdu_le_zss,
5376 gen_helper_sve_lddd_le_zss, } },
5377 { { gen_helper_sve_ldbds_zd,
5378 gen_helper_sve_ldhds_le_zd,
5379 gen_helper_sve_ldsds_le_zd,
5380 NULL, },
5381 { gen_helper_sve_ldbdu_zd,
5382 gen_helper_sve_ldhdu_le_zd,
5383 gen_helper_sve_ldsdu_le_zd,
5384 gen_helper_sve_lddd_le_zd, } } },
5385
5386 /* First-fault */
5387 { { { gen_helper_sve_ldffbds_zsu,
5388 gen_helper_sve_ldffhds_le_zsu,
5389 gen_helper_sve_ldffsds_le_zsu,
5390 NULL, },
5391 { gen_helper_sve_ldffbdu_zsu,
5392 gen_helper_sve_ldffhdu_le_zsu,
5393 gen_helper_sve_ldffsdu_le_zsu,
5394 gen_helper_sve_ldffdd_le_zsu, } },
5395 { { gen_helper_sve_ldffbds_zss,
5396 gen_helper_sve_ldffhds_le_zss,
5397 gen_helper_sve_ldffsds_le_zss,
5398 NULL, },
5399 { gen_helper_sve_ldffbdu_zss,
5400 gen_helper_sve_ldffhdu_le_zss,
5401 gen_helper_sve_ldffsdu_le_zss,
5402 gen_helper_sve_ldffdd_le_zss, } },
5403 { { gen_helper_sve_ldffbds_zd,
5404 gen_helper_sve_ldffhds_le_zd,
5405 gen_helper_sve_ldffsds_le_zd,
5406 NULL, },
5407 { gen_helper_sve_ldffbdu_zd,
5408 gen_helper_sve_ldffhdu_le_zd,
5409 gen_helper_sve_ldffsdu_le_zd,
5410 gen_helper_sve_ldffdd_le_zd, } } } },
5411 { /* Big-endian */
5412 { { { gen_helper_sve_ldbds_zsu,
5413 gen_helper_sve_ldhds_be_zsu,
5414 gen_helper_sve_ldsds_be_zsu,
5415 NULL, },
5416 { gen_helper_sve_ldbdu_zsu,
5417 gen_helper_sve_ldhdu_be_zsu,
5418 gen_helper_sve_ldsdu_be_zsu,
5419 gen_helper_sve_lddd_be_zsu, } },
5420 { { gen_helper_sve_ldbds_zss,
5421 gen_helper_sve_ldhds_be_zss,
5422 gen_helper_sve_ldsds_be_zss,
5423 NULL, },
5424 { gen_helper_sve_ldbdu_zss,
5425 gen_helper_sve_ldhdu_be_zss,
5426 gen_helper_sve_ldsdu_be_zss,
5427 gen_helper_sve_lddd_be_zss, } },
5428 { { gen_helper_sve_ldbds_zd,
5429 gen_helper_sve_ldhds_be_zd,
5430 gen_helper_sve_ldsds_be_zd,
5431 NULL, },
5432 { gen_helper_sve_ldbdu_zd,
5433 gen_helper_sve_ldhdu_be_zd,
5434 gen_helper_sve_ldsdu_be_zd,
5435 gen_helper_sve_lddd_be_zd, } } },
5436
5437 /* First-fault */
5438 { { { gen_helper_sve_ldffbds_zsu,
5439 gen_helper_sve_ldffhds_be_zsu,
5440 gen_helper_sve_ldffsds_be_zsu,
5441 NULL, },
5442 { gen_helper_sve_ldffbdu_zsu,
5443 gen_helper_sve_ldffhdu_be_zsu,
5444 gen_helper_sve_ldffsdu_be_zsu,
5445 gen_helper_sve_ldffdd_be_zsu, } },
5446 { { gen_helper_sve_ldffbds_zss,
5447 gen_helper_sve_ldffhds_be_zss,
5448 gen_helper_sve_ldffsds_be_zss,
5449 NULL, },
5450 { gen_helper_sve_ldffbdu_zss,
5451 gen_helper_sve_ldffhdu_be_zss,
5452 gen_helper_sve_ldffsdu_be_zss,
5453 gen_helper_sve_ldffdd_be_zss, } },
5454 { { gen_helper_sve_ldffbds_zd,
5455 gen_helper_sve_ldffhds_be_zd,
5456 gen_helper_sve_ldffsds_be_zd,
5457 NULL, },
5458 { gen_helper_sve_ldffbdu_zd,
5459 gen_helper_sve_ldffhdu_be_zd,
5460 gen_helper_sve_ldffsdu_be_zd,
5461 gen_helper_sve_ldffdd_be_zd, } } } } },
5462 { /* MTE Active */
5463 { /* Little-endian */
5464 { { { gen_helper_sve_ldbds_zsu_mte,
5465 gen_helper_sve_ldhds_le_zsu_mte,
5466 gen_helper_sve_ldsds_le_zsu_mte,
5467 NULL, },
5468 { gen_helper_sve_ldbdu_zsu_mte,
5469 gen_helper_sve_ldhdu_le_zsu_mte,
5470 gen_helper_sve_ldsdu_le_zsu_mte,
5471 gen_helper_sve_lddd_le_zsu_mte, } },
5472 { { gen_helper_sve_ldbds_zss_mte,
5473 gen_helper_sve_ldhds_le_zss_mte,
5474 gen_helper_sve_ldsds_le_zss_mte,
5475 NULL, },
5476 { gen_helper_sve_ldbdu_zss_mte,
5477 gen_helper_sve_ldhdu_le_zss_mte,
5478 gen_helper_sve_ldsdu_le_zss_mte,
5479 gen_helper_sve_lddd_le_zss_mte, } },
5480 { { gen_helper_sve_ldbds_zd_mte,
5481 gen_helper_sve_ldhds_le_zd_mte,
5482 gen_helper_sve_ldsds_le_zd_mte,
5483 NULL, },
5484 { gen_helper_sve_ldbdu_zd_mte,
5485 gen_helper_sve_ldhdu_le_zd_mte,
5486 gen_helper_sve_ldsdu_le_zd_mte,
5487 gen_helper_sve_lddd_le_zd_mte, } } },
5488
5489 /* First-fault */
5490 { { { gen_helper_sve_ldffbds_zsu_mte,
5491 gen_helper_sve_ldffhds_le_zsu_mte,
5492 gen_helper_sve_ldffsds_le_zsu_mte,
5493 NULL, },
5494 { gen_helper_sve_ldffbdu_zsu_mte,
5495 gen_helper_sve_ldffhdu_le_zsu_mte,
5496 gen_helper_sve_ldffsdu_le_zsu_mte,
5497 gen_helper_sve_ldffdd_le_zsu_mte, } },
5498 { { gen_helper_sve_ldffbds_zss_mte,
5499 gen_helper_sve_ldffhds_le_zss_mte,
5500 gen_helper_sve_ldffsds_le_zss_mte,
5501 NULL, },
5502 { gen_helper_sve_ldffbdu_zss_mte,
5503 gen_helper_sve_ldffhdu_le_zss_mte,
5504 gen_helper_sve_ldffsdu_le_zss_mte,
5505 gen_helper_sve_ldffdd_le_zss_mte, } },
5506 { { gen_helper_sve_ldffbds_zd_mte,
5507 gen_helper_sve_ldffhds_le_zd_mte,
5508 gen_helper_sve_ldffsds_le_zd_mte,
5509 NULL, },
5510 { gen_helper_sve_ldffbdu_zd_mte,
5511 gen_helper_sve_ldffhdu_le_zd_mte,
5512 gen_helper_sve_ldffsdu_le_zd_mte,
5513 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5514 { /* Big-endian */
5515 { { { gen_helper_sve_ldbds_zsu_mte,
5516 gen_helper_sve_ldhds_be_zsu_mte,
5517 gen_helper_sve_ldsds_be_zsu_mte,
5518 NULL, },
5519 { gen_helper_sve_ldbdu_zsu_mte,
5520 gen_helper_sve_ldhdu_be_zsu_mte,
5521 gen_helper_sve_ldsdu_be_zsu_mte,
5522 gen_helper_sve_lddd_be_zsu_mte, } },
5523 { { gen_helper_sve_ldbds_zss_mte,
5524 gen_helper_sve_ldhds_be_zss_mte,
5525 gen_helper_sve_ldsds_be_zss_mte,
5526 NULL, },
5527 { gen_helper_sve_ldbdu_zss_mte,
5528 gen_helper_sve_ldhdu_be_zss_mte,
5529 gen_helper_sve_ldsdu_be_zss_mte,
5530 gen_helper_sve_lddd_be_zss_mte, } },
5531 { { gen_helper_sve_ldbds_zd_mte,
5532 gen_helper_sve_ldhds_be_zd_mte,
5533 gen_helper_sve_ldsds_be_zd_mte,
5534 NULL, },
5535 { gen_helper_sve_ldbdu_zd_mte,
5536 gen_helper_sve_ldhdu_be_zd_mte,
5537 gen_helper_sve_ldsdu_be_zd_mte,
5538 gen_helper_sve_lddd_be_zd_mte, } } },
5539
5540 /* First-fault */
5541 { { { gen_helper_sve_ldffbds_zsu_mte,
5542 gen_helper_sve_ldffhds_be_zsu_mte,
5543 gen_helper_sve_ldffsds_be_zsu_mte,
5544 NULL, },
5545 { gen_helper_sve_ldffbdu_zsu_mte,
5546 gen_helper_sve_ldffhdu_be_zsu_mte,
5547 gen_helper_sve_ldffsdu_be_zsu_mte,
5548 gen_helper_sve_ldffdd_be_zsu_mte, } },
5549 { { gen_helper_sve_ldffbds_zss_mte,
5550 gen_helper_sve_ldffhds_be_zss_mte,
5551 gen_helper_sve_ldffsds_be_zss_mte,
5552 NULL, },
5553 { gen_helper_sve_ldffbdu_zss_mte,
5554 gen_helper_sve_ldffhdu_be_zss_mte,
5555 gen_helper_sve_ldffsdu_be_zss_mte,
5556 gen_helper_sve_ldffdd_be_zss_mte, } },
5557 { { gen_helper_sve_ldffbds_zd_mte,
5558 gen_helper_sve_ldffhds_be_zd_mte,
5559 gen_helper_sve_ldffsds_be_zd_mte,
5560 NULL, },
5561 { gen_helper_sve_ldffbdu_zd_mte,
5562 gen_helper_sve_ldffhdu_be_zd_mte,
5563 gen_helper_sve_ldffsdu_be_zd_mte,
5564 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5565};
5566
3a7be554 5567static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5568{
5569 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5570 bool be = s->be_data == MO_BE;
5571 bool mte = s->mte_active[0];
673e9fa6
RH
5572
5573 if (!sve_access_check(s)) {
5574 return true;
5575 }
5576
5577 switch (a->esz) {
5578 case MO_32:
d28d12f0 5579 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5580 break;
5581 case MO_64:
d28d12f0 5582 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5583 break;
5584 }
5585 assert(fn != NULL);
5586
5587 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5588 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5589 return true;
5590}
5591
3a7be554 5592static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5593{
5594 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5595 bool be = s->be_data == MO_BE;
5596 bool mte = s->mte_active[0];
673e9fa6
RH
5597
5598 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5599 return false;
5600 }
5601 if (!sve_access_check(s)) {
5602 return true;
5603 }
5604
5605 switch (a->esz) {
5606 case MO_32:
d28d12f0 5607 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5608 break;
5609 case MO_64:
d28d12f0 5610 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5611 break;
5612 }
5613 assert(fn != NULL);
5614
5615 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5616 * by loading the immediate into the scalar parameter.
5617 */
2ccdf94f
RH
5618 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5619 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5620 return true;
5621}
5622
cf327449
SL
5623static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5624{
b17ab470
RH
5625 gen_helper_gvec_mem_scatter *fn = NULL;
5626 bool be = s->be_data == MO_BE;
5627 bool mte = s->mte_active[0];
5628
5629 if (a->esz < a->msz + !a->u) {
5630 return false;
5631 }
cf327449
SL
5632 if (!dc_isar_feature(aa64_sve2, s)) {
5633 return false;
5634 }
b17ab470
RH
5635 if (!sve_access_check(s)) {
5636 return true;
5637 }
5638
5639 switch (a->esz) {
5640 case MO_32:
5641 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5642 break;
5643 case MO_64:
5644 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5645 break;
5646 }
5647 assert(fn != NULL);
5648
5649 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5650 cpu_reg(s, a->rm), a->msz, false, fn);
5651 return true;
cf327449
SL
5652}
5653
d28d12f0
RH
5654/* Indexed by [mte][be][xs][msz]. */
5655static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5656 { /* MTE Inactive */
5657 { /* Little-endian */
5658 { gen_helper_sve_stbs_zsu,
5659 gen_helper_sve_sths_le_zsu,
5660 gen_helper_sve_stss_le_zsu, },
5661 { gen_helper_sve_stbs_zss,
5662 gen_helper_sve_sths_le_zss,
5663 gen_helper_sve_stss_le_zss, } },
5664 { /* Big-endian */
5665 { gen_helper_sve_stbs_zsu,
5666 gen_helper_sve_sths_be_zsu,
5667 gen_helper_sve_stss_be_zsu, },
5668 { gen_helper_sve_stbs_zss,
5669 gen_helper_sve_sths_be_zss,
5670 gen_helper_sve_stss_be_zss, } } },
5671 { /* MTE Active */
5672 { /* Little-endian */
5673 { gen_helper_sve_stbs_zsu_mte,
5674 gen_helper_sve_sths_le_zsu_mte,
5675 gen_helper_sve_stss_le_zsu_mte, },
5676 { gen_helper_sve_stbs_zss_mte,
5677 gen_helper_sve_sths_le_zss_mte,
5678 gen_helper_sve_stss_le_zss_mte, } },
5679 { /* Big-endian */
5680 { gen_helper_sve_stbs_zsu_mte,
5681 gen_helper_sve_sths_be_zsu_mte,
5682 gen_helper_sve_stss_be_zsu_mte, },
5683 { gen_helper_sve_stbs_zss_mte,
5684 gen_helper_sve_sths_be_zss_mte,
5685 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5686};
5687
5688/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5689static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5690 { /* MTE Inactive */
5691 { /* Little-endian */
5692 { gen_helper_sve_stbd_zsu,
5693 gen_helper_sve_sthd_le_zsu,
5694 gen_helper_sve_stsd_le_zsu,
5695 gen_helper_sve_stdd_le_zsu, },
5696 { gen_helper_sve_stbd_zss,
5697 gen_helper_sve_sthd_le_zss,
5698 gen_helper_sve_stsd_le_zss,
5699 gen_helper_sve_stdd_le_zss, },
5700 { gen_helper_sve_stbd_zd,
5701 gen_helper_sve_sthd_le_zd,
5702 gen_helper_sve_stsd_le_zd,
5703 gen_helper_sve_stdd_le_zd, } },
5704 { /* Big-endian */
5705 { gen_helper_sve_stbd_zsu,
5706 gen_helper_sve_sthd_be_zsu,
5707 gen_helper_sve_stsd_be_zsu,
5708 gen_helper_sve_stdd_be_zsu, },
5709 { gen_helper_sve_stbd_zss,
5710 gen_helper_sve_sthd_be_zss,
5711 gen_helper_sve_stsd_be_zss,
5712 gen_helper_sve_stdd_be_zss, },
5713 { gen_helper_sve_stbd_zd,
5714 gen_helper_sve_sthd_be_zd,
5715 gen_helper_sve_stsd_be_zd,
5716 gen_helper_sve_stdd_be_zd, } } },
5717 { /* MTE Inactive */
5718 { /* Little-endian */
5719 { gen_helper_sve_stbd_zsu_mte,
5720 gen_helper_sve_sthd_le_zsu_mte,
5721 gen_helper_sve_stsd_le_zsu_mte,
5722 gen_helper_sve_stdd_le_zsu_mte, },
5723 { gen_helper_sve_stbd_zss_mte,
5724 gen_helper_sve_sthd_le_zss_mte,
5725 gen_helper_sve_stsd_le_zss_mte,
5726 gen_helper_sve_stdd_le_zss_mte, },
5727 { gen_helper_sve_stbd_zd_mte,
5728 gen_helper_sve_sthd_le_zd_mte,
5729 gen_helper_sve_stsd_le_zd_mte,
5730 gen_helper_sve_stdd_le_zd_mte, } },
5731 { /* Big-endian */
5732 { gen_helper_sve_stbd_zsu_mte,
5733 gen_helper_sve_sthd_be_zsu_mte,
5734 gen_helper_sve_stsd_be_zsu_mte,
5735 gen_helper_sve_stdd_be_zsu_mte, },
5736 { gen_helper_sve_stbd_zss_mte,
5737 gen_helper_sve_sthd_be_zss_mte,
5738 gen_helper_sve_stsd_be_zss_mte,
5739 gen_helper_sve_stdd_be_zss_mte, },
5740 { gen_helper_sve_stbd_zd_mte,
5741 gen_helper_sve_sthd_be_zd_mte,
5742 gen_helper_sve_stsd_be_zd_mte,
5743 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5744};
5745
3a7be554 5746static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5747{
f6dbf62a 5748 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5749 bool be = s->be_data == MO_BE;
5750 bool mte = s->mte_active[0];
f6dbf62a
RH
5751
5752 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5753 return false;
5754 }
5755 if (!sve_access_check(s)) {
5756 return true;
5757 }
5758 switch (a->esz) {
5759 case MO_32:
d28d12f0 5760 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5761 break;
5762 case MO_64:
d28d12f0 5763 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5764 break;
5765 default:
5766 g_assert_not_reached();
5767 }
5768 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5769 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5770 return true;
5771}
dec6cf6b 5772
3a7be554 5773static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5774{
5775 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5776 bool be = s->be_data == MO_BE;
5777 bool mte = s->mte_active[0];
408ecde9
RH
5778
5779 if (a->esz < a->msz) {
5780 return false;
5781 }
5782 if (!sve_access_check(s)) {
5783 return true;
5784 }
5785
5786 switch (a->esz) {
5787 case MO_32:
d28d12f0 5788 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5789 break;
5790 case MO_64:
d28d12f0 5791 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5792 break;
5793 }
5794 assert(fn != NULL);
5795
5796 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5797 * by loading the immediate into the scalar parameter.
5798 */
2ccdf94f
RH
5799 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5800 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5801 return true;
5802}
5803
6ebca45f
SL
5804static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5805{
b17ab470
RH
5806 gen_helper_gvec_mem_scatter *fn;
5807 bool be = s->be_data == MO_BE;
5808 bool mte = s->mte_active[0];
5809
5810 if (a->esz < a->msz) {
5811 return false;
5812 }
6ebca45f
SL
5813 if (!dc_isar_feature(aa64_sve2, s)) {
5814 return false;
5815 }
b17ab470
RH
5816 if (!sve_access_check(s)) {
5817 return true;
5818 }
5819
5820 switch (a->esz) {
5821 case MO_32:
5822 fn = scatter_store_fn32[mte][be][0][a->msz];
5823 break;
5824 case MO_64:
5825 fn = scatter_store_fn64[mte][be][2][a->msz];
5826 break;
5827 default:
5828 g_assert_not_reached();
5829 }
5830
5831 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5832 cpu_reg(s, a->rm), a->msz, true, fn);
5833 return true;
6ebca45f
SL
5834}
5835
dec6cf6b
RH
5836/*
5837 * Prefetches
5838 */
5839
3a7be554 5840static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5841{
5842 /* Prefetch is a nop within QEMU. */
2f95a3b0 5843 (void)sve_access_check(s);
dec6cf6b
RH
5844 return true;
5845}
5846
3a7be554 5847static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5848{
5849 if (a->rm == 31) {
5850 return false;
5851 }
5852 /* Prefetch is a nop within QEMU. */
2f95a3b0 5853 (void)sve_access_check(s);
dec6cf6b
RH
5854 return true;
5855}
a2103582
RH
5856
5857/*
5858 * Move Prefix
5859 *
5860 * TODO: The implementation so far could handle predicated merging movprfx.
5861 * The helper functions as written take an extra source register to
5862 * use in the operation, but the result is only written when predication
5863 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5864 * to allow the final write back to the destination to be unconditional.
5865 * For predicated zeroing movprfx, we need to rearrange the helpers to
5866 * allow the final write back to zero inactives.
5867 *
5868 * In the meantime, just emit the moves.
5869 */
5870
4b0b37e9
RH
5871TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5872TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5873TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5dad1ba5
RH
5874
5875/*
5876 * SVE2 Integer Multiply - Unpredicated
5877 */
5878
b262215b 5879TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 5880
bd394cf5
RH
5881static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5882 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5883 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5884};
5885TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5886 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 5887
bd394cf5
RH
5888static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5889 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5890 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5891};
5892TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5893 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 5894
bd394cf5
RH
5895TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5896 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 5897
bd394cf5
RH
5898static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5899 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5900 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5901};
5902TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5903 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 5904
bd394cf5
RH
5905static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5906 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5907 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5908};
5909TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5910 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 5911
d4b1e59d
RH
5912/*
5913 * SVE2 Integer - Predicated
5914 */
5915
5880bdc0
RH
5916static gen_helper_gvec_4 * const sadlp_fns[4] = {
5917 NULL, gen_helper_sve2_sadalp_zpzz_h,
5918 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5919};
5920TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5921 sadlp_fns[a->esz], a, 0)
d4b1e59d 5922
5880bdc0
RH
5923static gen_helper_gvec_4 * const uadlp_fns[4] = {
5924 NULL, gen_helper_sve2_uadalp_zpzz_h,
5925 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5926};
5927TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5928 uadlp_fns[a->esz], a, 0)
db366da8
RH
5929
5930/*
5931 * SVE2 integer unary operations (predicated)
5932 */
5933
b2c00961
RH
5934TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5935 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 5936
b2c00961
RH
5937TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5938 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 5939
b2c00961
RH
5940static gen_helper_gvec_3 * const sqabs_fns[4] = {
5941 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5942 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5943};
5944TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 5945
b2c00961
RH
5946static gen_helper_gvec_3 * const sqneg_fns[4] = {
5947 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5948 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5949};
5950TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 5951
5880bdc0
RH
5952DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5953DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5954DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 5955
5880bdc0
RH
5956DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5957DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5958DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 5959
5880bdc0
RH
5960DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5961DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5962DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 5963
5880bdc0
RH
5964DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5965DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5966DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 5967
5880bdc0
RH
5968DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5969DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5970DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5971DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5972DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 5973
5880bdc0
RH
5974DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5975DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5976DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5977DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5978DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5979DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
5980
5981/*
5982 * SVE2 Widening Integer Arithmetic
5983 */
5984
615f19fe
RH
5985static gen_helper_gvec_3 * const saddl_fns[4] = {
5986 NULL, gen_helper_sve2_saddl_h,
5987 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5988};
5989TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5990 saddl_fns[a->esz], a, 0)
5991TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5992 saddl_fns[a->esz], a, 3)
5993TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5994 saddl_fns[a->esz], a, 2)
5995
5996static gen_helper_gvec_3 * const ssubl_fns[4] = {
5997 NULL, gen_helper_sve2_ssubl_h,
5998 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5999};
6000TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6001 ssubl_fns[a->esz], a, 0)
6002TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6003 ssubl_fns[a->esz], a, 3)
6004TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6005 ssubl_fns[a->esz], a, 2)
6006TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6007 ssubl_fns[a->esz], a, 1)
6008
6009static gen_helper_gvec_3 * const sabdl_fns[4] = {
6010 NULL, gen_helper_sve2_sabdl_h,
6011 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6012};
6013TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6014 sabdl_fns[a->esz], a, 0)
6015TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6016 sabdl_fns[a->esz], a, 3)
6017
6018static gen_helper_gvec_3 * const uaddl_fns[4] = {
6019 NULL, gen_helper_sve2_uaddl_h,
6020 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6021};
6022TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6023 uaddl_fns[a->esz], a, 0)
6024TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6025 uaddl_fns[a->esz], a, 3)
6026
6027static gen_helper_gvec_3 * const usubl_fns[4] = {
6028 NULL, gen_helper_sve2_usubl_h,
6029 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6030};
6031TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6032 usubl_fns[a->esz], a, 0)
6033TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6034 usubl_fns[a->esz], a, 3)
6035
6036static gen_helper_gvec_3 * const uabdl_fns[4] = {
6037 NULL, gen_helper_sve2_uabdl_h,
6038 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6039};
6040TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6041 uabdl_fns[a->esz], a, 0)
6042TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6043 uabdl_fns[a->esz], a, 3)
6044
6045static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6046 NULL, gen_helper_sve2_sqdmull_zzz_h,
6047 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6048};
6049TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6050 sqdmull_fns[a->esz], a, 0)
6051TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6052 sqdmull_fns[a->esz], a, 3)
6053
6054static gen_helper_gvec_3 * const smull_fns[4] = {
6055 NULL, gen_helper_sve2_smull_zzz_h,
6056 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6057};
6058TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6059 smull_fns[a->esz], a, 0)
6060TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6061 smull_fns[a->esz], a, 3)
6062
6063static gen_helper_gvec_3 * const umull_fns[4] = {
6064 NULL, gen_helper_sve2_umull_zzz_h,
6065 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6066};
6067TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6068 umull_fns[a->esz], a, 0)
6069TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6070 umull_fns[a->esz], a, 3)
6071
6072static gen_helper_gvec_3 * const eoril_fns[4] = {
6073 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6074 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6075};
6076TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6077TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6078
e3a56131
RH
6079static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6080{
6081 static gen_helper_gvec_3 * const fns[4] = {
6082 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6083 NULL, gen_helper_sve2_pmull_d,
6084 };
6085 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6086 return false;
6087 }
615f19fe 6088 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6089}
6090
615f19fe
RH
6091TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6092TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6093
615f19fe
RH
6094static gen_helper_gvec_3 * const saddw_fns[4] = {
6095 NULL, gen_helper_sve2_saddw_h,
6096 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6097};
6098TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6099TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6100
615f19fe
RH
6101static gen_helper_gvec_3 * const ssubw_fns[4] = {
6102 NULL, gen_helper_sve2_ssubw_h,
6103 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6104};
6105TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6106TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6107
615f19fe
RH
6108static gen_helper_gvec_3 * const uaddw_fns[4] = {
6109 NULL, gen_helper_sve2_uaddw_h,
6110 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6111};
6112TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6113TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6114
615f19fe
RH
6115static gen_helper_gvec_3 * const usubw_fns[4] = {
6116 NULL, gen_helper_sve2_usubw_h,
6117 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6118};
6119TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6120TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6121
6122static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6123{
6124 int top = imm & 1;
6125 int shl = imm >> 1;
6126 int halfbits = 4 << vece;
6127
6128 if (top) {
6129 if (shl == halfbits) {
6130 TCGv_vec t = tcg_temp_new_vec_matching(d);
6131 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6132 tcg_gen_and_vec(vece, d, n, t);
6133 tcg_temp_free_vec(t);
6134 } else {
6135 tcg_gen_sari_vec(vece, d, n, halfbits);
6136 tcg_gen_shli_vec(vece, d, d, shl);
6137 }
6138 } else {
6139 tcg_gen_shli_vec(vece, d, n, halfbits);
6140 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6141 }
6142}
6143
6144static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6145{
6146 int halfbits = 4 << vece;
6147 int top = imm & 1;
6148 int shl = (imm >> 1);
6149 int shift;
6150 uint64_t mask;
6151
6152 mask = MAKE_64BIT_MASK(0, halfbits);
6153 mask <<= shl;
6154 mask = dup_const(vece, mask);
6155
6156 shift = shl - top * halfbits;
6157 if (shift < 0) {
6158 tcg_gen_shri_i64(d, n, -shift);
6159 } else {
6160 tcg_gen_shli_i64(d, n, shift);
6161 }
6162 tcg_gen_andi_i64(d, d, mask);
6163}
6164
6165static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6166{
6167 gen_ushll_i64(MO_16, d, n, imm);
6168}
6169
6170static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6171{
6172 gen_ushll_i64(MO_32, d, n, imm);
6173}
6174
6175static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6176{
6177 gen_ushll_i64(MO_64, d, n, imm);
6178}
6179
6180static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6181{
6182 int halfbits = 4 << vece;
6183 int top = imm & 1;
6184 int shl = imm >> 1;
6185
6186 if (top) {
6187 if (shl == halfbits) {
6188 TCGv_vec t = tcg_temp_new_vec_matching(d);
6189 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6190 tcg_gen_and_vec(vece, d, n, t);
6191 tcg_temp_free_vec(t);
6192 } else {
6193 tcg_gen_shri_vec(vece, d, n, halfbits);
6194 tcg_gen_shli_vec(vece, d, d, shl);
6195 }
6196 } else {
6197 if (shl == 0) {
6198 TCGv_vec t = tcg_temp_new_vec_matching(d);
6199 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6200 tcg_gen_and_vec(vece, d, n, t);
6201 tcg_temp_free_vec(t);
6202 } else {
6203 tcg_gen_shli_vec(vece, d, n, halfbits);
6204 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6205 }
6206 }
6207}
6208
6209static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6210 bool sel, bool uns)
6211{
6212 static const TCGOpcode sshll_list[] = {
6213 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6214 };
6215 static const TCGOpcode ushll_list[] = {
6216 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6217 };
6218 static const GVecGen2i ops[2][3] = {
6219 { { .fniv = gen_sshll_vec,
6220 .opt_opc = sshll_list,
6221 .fno = gen_helper_sve2_sshll_h,
6222 .vece = MO_16 },
6223 { .fniv = gen_sshll_vec,
6224 .opt_opc = sshll_list,
6225 .fno = gen_helper_sve2_sshll_s,
6226 .vece = MO_32 },
6227 { .fniv = gen_sshll_vec,
6228 .opt_opc = sshll_list,
6229 .fno = gen_helper_sve2_sshll_d,
6230 .vece = MO_64 } },
6231 { { .fni8 = gen_ushll16_i64,
6232 .fniv = gen_ushll_vec,
6233 .opt_opc = ushll_list,
6234 .fno = gen_helper_sve2_ushll_h,
6235 .vece = MO_16 },
6236 { .fni8 = gen_ushll32_i64,
6237 .fniv = gen_ushll_vec,
6238 .opt_opc = ushll_list,
6239 .fno = gen_helper_sve2_ushll_s,
6240 .vece = MO_32 },
6241 { .fni8 = gen_ushll64_i64,
6242 .fniv = gen_ushll_vec,
6243 .opt_opc = ushll_list,
6244 .fno = gen_helper_sve2_ushll_d,
6245 .vece = MO_64 } },
6246 };
6247
6248 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6249 return false;
6250 }
6251 if (sve_access_check(s)) {
6252 unsigned vsz = vec_full_reg_size(s);
6253 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6254 vec_full_reg_offset(s, a->rn),
6255 vsz, vsz, (a->imm << 1) | sel,
6256 &ops[uns][a->esz]);
6257 }
6258 return true;
6259}
6260
6261static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6262{
6263 return do_sve2_shll_tb(s, a, false, false);
6264}
6265
6266static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6267{
6268 return do_sve2_shll_tb(s, a, true, false);
6269}
6270
6271static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6272{
6273 return do_sve2_shll_tb(s, a, false, true);
6274}
6275
6276static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6277{
6278 return do_sve2_shll_tb(s, a, true, true);
6279}
cb9c33b8 6280
615f19fe
RH
6281static gen_helper_gvec_3 * const bext_fns[4] = {
6282 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6283 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6284};
6285TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6286 bext_fns[a->esz], a, 0)
ed4a6387 6287
615f19fe
RH
6288static gen_helper_gvec_3 * const bdep_fns[4] = {
6289 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6290 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6291};
6292TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6293 bdep_fns[a->esz], a, 0)
ed4a6387 6294
615f19fe
RH
6295static gen_helper_gvec_3 * const bgrp_fns[4] = {
6296 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6297 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6298};
6299TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6300 bgrp_fns[a->esz], a, 0)
ed4a6387 6301
615f19fe
RH
6302static gen_helper_gvec_3 * const cadd_fns[4] = {
6303 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6304 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6305};
6306TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6307 cadd_fns[a->esz], a, 0)
6308TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6309 cadd_fns[a->esz], a, 1)
6310
6311static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6312 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6313 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6314};
6315TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6316 sqcadd_fns[a->esz], a, 0)
6317TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6318 sqcadd_fns[a->esz], a, 1)
38650638 6319
eeb4e84d
RH
6320static gen_helper_gvec_4 * const sabal_fns[4] = {
6321 NULL, gen_helper_sve2_sabal_h,
6322 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6323};
6324TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6325TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6326
eeb4e84d
RH
6327static gen_helper_gvec_4 * const uabal_fns[4] = {
6328 NULL, gen_helper_sve2_uabal_h,
6329 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6330};
6331TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6332TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6333
6334static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6335{
6336 static gen_helper_gvec_4 * const fns[2] = {
6337 gen_helper_sve2_adcl_s,
6338 gen_helper_sve2_adcl_d,
6339 };
6340 /*
6341 * Note that in this case the ESZ field encodes both size and sign.
6342 * Split out 'subtract' into bit 1 of the data field for the helper.
6343 */
eeb4e84d 6344 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6345}
6346
eeb4e84d
RH
6347TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6348TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6349
f2be26a5
RH
6350TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6351TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6352TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6353TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6354TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6355TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6356
79828dcb
RH
6357TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6358TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d
RH
6359
6360static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6361 const GVecGen2 ops[3])
6362{
6363 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6364 !dc_isar_feature(aa64_sve2, s)) {
6365 return false;
6366 }
6367 if (sve_access_check(s)) {
6368 unsigned vsz = vec_full_reg_size(s);
6369 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6370 vec_full_reg_offset(s, a->rn),
6371 vsz, vsz, &ops[a->esz]);
6372 }
6373 return true;
6374}
6375
6376static const TCGOpcode sqxtn_list[] = {
6377 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6378};
6379
6380static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6381{
6382 TCGv_vec t = tcg_temp_new_vec_matching(d);
6383 int halfbits = 4 << vece;
6384 int64_t mask = (1ull << halfbits) - 1;
6385 int64_t min = -1ull << (halfbits - 1);
6386 int64_t max = -min - 1;
6387
6388 tcg_gen_dupi_vec(vece, t, min);
6389 tcg_gen_smax_vec(vece, d, n, t);
6390 tcg_gen_dupi_vec(vece, t, max);
6391 tcg_gen_smin_vec(vece, d, d, t);
6392 tcg_gen_dupi_vec(vece, t, mask);
6393 tcg_gen_and_vec(vece, d, d, t);
6394 tcg_temp_free_vec(t);
6395}
6396
6397static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6398{
6399 static const GVecGen2 ops[3] = {
6400 { .fniv = gen_sqxtnb_vec,
6401 .opt_opc = sqxtn_list,
6402 .fno = gen_helper_sve2_sqxtnb_h,
6403 .vece = MO_16 },
6404 { .fniv = gen_sqxtnb_vec,
6405 .opt_opc = sqxtn_list,
6406 .fno = gen_helper_sve2_sqxtnb_s,
6407 .vece = MO_32 },
6408 { .fniv = gen_sqxtnb_vec,
6409 .opt_opc = sqxtn_list,
6410 .fno = gen_helper_sve2_sqxtnb_d,
6411 .vece = MO_64 },
6412 };
6413 return do_sve2_narrow_extract(s, a, ops);
6414}
6415
6416static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6417{
6418 TCGv_vec t = tcg_temp_new_vec_matching(d);
6419 int halfbits = 4 << vece;
6420 int64_t mask = (1ull << halfbits) - 1;
6421 int64_t min = -1ull << (halfbits - 1);
6422 int64_t max = -min - 1;
6423
6424 tcg_gen_dupi_vec(vece, t, min);
6425 tcg_gen_smax_vec(vece, n, n, t);
6426 tcg_gen_dupi_vec(vece, t, max);
6427 tcg_gen_smin_vec(vece, n, n, t);
6428 tcg_gen_shli_vec(vece, n, n, halfbits);
6429 tcg_gen_dupi_vec(vece, t, mask);
6430 tcg_gen_bitsel_vec(vece, d, t, d, n);
6431 tcg_temp_free_vec(t);
6432}
6433
6434static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6435{
6436 static const GVecGen2 ops[3] = {
6437 { .fniv = gen_sqxtnt_vec,
6438 .opt_opc = sqxtn_list,
6439 .load_dest = true,
6440 .fno = gen_helper_sve2_sqxtnt_h,
6441 .vece = MO_16 },
6442 { .fniv = gen_sqxtnt_vec,
6443 .opt_opc = sqxtn_list,
6444 .load_dest = true,
6445 .fno = gen_helper_sve2_sqxtnt_s,
6446 .vece = MO_32 },
6447 { .fniv = gen_sqxtnt_vec,
6448 .opt_opc = sqxtn_list,
6449 .load_dest = true,
6450 .fno = gen_helper_sve2_sqxtnt_d,
6451 .vece = MO_64 },
6452 };
6453 return do_sve2_narrow_extract(s, a, ops);
6454}
6455
6456static const TCGOpcode uqxtn_list[] = {
6457 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6458};
6459
6460static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6461{
6462 TCGv_vec t = tcg_temp_new_vec_matching(d);
6463 int halfbits = 4 << vece;
6464 int64_t max = (1ull << halfbits) - 1;
6465
6466 tcg_gen_dupi_vec(vece, t, max);
6467 tcg_gen_umin_vec(vece, d, n, t);
6468 tcg_temp_free_vec(t);
6469}
6470
6471static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6472{
6473 static const GVecGen2 ops[3] = {
6474 { .fniv = gen_uqxtnb_vec,
6475 .opt_opc = uqxtn_list,
6476 .fno = gen_helper_sve2_uqxtnb_h,
6477 .vece = MO_16 },
6478 { .fniv = gen_uqxtnb_vec,
6479 .opt_opc = uqxtn_list,
6480 .fno = gen_helper_sve2_uqxtnb_s,
6481 .vece = MO_32 },
6482 { .fniv = gen_uqxtnb_vec,
6483 .opt_opc = uqxtn_list,
6484 .fno = gen_helper_sve2_uqxtnb_d,
6485 .vece = MO_64 },
6486 };
6487 return do_sve2_narrow_extract(s, a, ops);
6488}
6489
6490static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6491{
6492 TCGv_vec t = tcg_temp_new_vec_matching(d);
6493 int halfbits = 4 << vece;
6494 int64_t max = (1ull << halfbits) - 1;
6495
6496 tcg_gen_dupi_vec(vece, t, max);
6497 tcg_gen_umin_vec(vece, n, n, t);
6498 tcg_gen_shli_vec(vece, n, n, halfbits);
6499 tcg_gen_bitsel_vec(vece, d, t, d, n);
6500 tcg_temp_free_vec(t);
6501}
6502
6503static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
6504{
6505 static const GVecGen2 ops[3] = {
6506 { .fniv = gen_uqxtnt_vec,
6507 .opt_opc = uqxtn_list,
6508 .load_dest = true,
6509 .fno = gen_helper_sve2_uqxtnt_h,
6510 .vece = MO_16 },
6511 { .fniv = gen_uqxtnt_vec,
6512 .opt_opc = uqxtn_list,
6513 .load_dest = true,
6514 .fno = gen_helper_sve2_uqxtnt_s,
6515 .vece = MO_32 },
6516 { .fniv = gen_uqxtnt_vec,
6517 .opt_opc = uqxtn_list,
6518 .load_dest = true,
6519 .fno = gen_helper_sve2_uqxtnt_d,
6520 .vece = MO_64 },
6521 };
6522 return do_sve2_narrow_extract(s, a, ops);
6523}
6524
6525static const TCGOpcode sqxtun_list[] = {
6526 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6527};
6528
6529static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6530{
6531 TCGv_vec t = tcg_temp_new_vec_matching(d);
6532 int halfbits = 4 << vece;
6533 int64_t max = (1ull << halfbits) - 1;
6534
6535 tcg_gen_dupi_vec(vece, t, 0);
6536 tcg_gen_smax_vec(vece, d, n, t);
6537 tcg_gen_dupi_vec(vece, t, max);
6538 tcg_gen_umin_vec(vece, d, d, t);
6539 tcg_temp_free_vec(t);
6540}
6541
6542static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
6543{
6544 static const GVecGen2 ops[3] = {
6545 { .fniv = gen_sqxtunb_vec,
6546 .opt_opc = sqxtun_list,
6547 .fno = gen_helper_sve2_sqxtunb_h,
6548 .vece = MO_16 },
6549 { .fniv = gen_sqxtunb_vec,
6550 .opt_opc = sqxtun_list,
6551 .fno = gen_helper_sve2_sqxtunb_s,
6552 .vece = MO_32 },
6553 { .fniv = gen_sqxtunb_vec,
6554 .opt_opc = sqxtun_list,
6555 .fno = gen_helper_sve2_sqxtunb_d,
6556 .vece = MO_64 },
6557 };
6558 return do_sve2_narrow_extract(s, a, ops);
6559}
6560
6561static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6562{
6563 TCGv_vec t = tcg_temp_new_vec_matching(d);
6564 int halfbits = 4 << vece;
6565 int64_t max = (1ull << halfbits) - 1;
6566
6567 tcg_gen_dupi_vec(vece, t, 0);
6568 tcg_gen_smax_vec(vece, n, n, t);
6569 tcg_gen_dupi_vec(vece, t, max);
6570 tcg_gen_umin_vec(vece, n, n, t);
6571 tcg_gen_shli_vec(vece, n, n, halfbits);
6572 tcg_gen_bitsel_vec(vece, d, t, d, n);
6573 tcg_temp_free_vec(t);
6574}
6575
6576static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
6577{
6578 static const GVecGen2 ops[3] = {
6579 { .fniv = gen_sqxtunt_vec,
6580 .opt_opc = sqxtun_list,
6581 .load_dest = true,
6582 .fno = gen_helper_sve2_sqxtunt_h,
6583 .vece = MO_16 },
6584 { .fniv = gen_sqxtunt_vec,
6585 .opt_opc = sqxtun_list,
6586 .load_dest = true,
6587 .fno = gen_helper_sve2_sqxtunt_s,
6588 .vece = MO_32 },
6589 { .fniv = gen_sqxtunt_vec,
6590 .opt_opc = sqxtun_list,
6591 .load_dest = true,
6592 .fno = gen_helper_sve2_sqxtunt_d,
6593 .vece = MO_64 },
6594 };
6595 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
6596}
6597
6598static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
6599 const GVecGen2i ops[3])
6600{
6601 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
6602 return false;
6603 }
6604 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6605 if (sve_access_check(s)) {
6606 unsigned vsz = vec_full_reg_size(s);
6607 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6608 vec_full_reg_offset(s, a->rn),
6609 vsz, vsz, a->imm, &ops[a->esz]);
6610 }
6611 return true;
6612}
6613
6614static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6615{
6616 int halfbits = 4 << vece;
6617 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6618
6619 tcg_gen_shri_i64(d, n, shr);
6620 tcg_gen_andi_i64(d, d, mask);
6621}
6622
6623static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6624{
6625 gen_shrnb_i64(MO_16, d, n, shr);
6626}
6627
6628static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6629{
6630 gen_shrnb_i64(MO_32, d, n, shr);
6631}
6632
6633static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6634{
6635 gen_shrnb_i64(MO_64, d, n, shr);
6636}
6637
6638static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6639{
6640 TCGv_vec t = tcg_temp_new_vec_matching(d);
6641 int halfbits = 4 << vece;
6642 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6643
6644 tcg_gen_shri_vec(vece, n, n, shr);
6645 tcg_gen_dupi_vec(vece, t, mask);
6646 tcg_gen_and_vec(vece, d, n, t);
6647 tcg_temp_free_vec(t);
6648}
6649
6650static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
6651{
6652 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
6653 static const GVecGen2i ops[3] = {
6654 { .fni8 = gen_shrnb16_i64,
6655 .fniv = gen_shrnb_vec,
6656 .opt_opc = vec_list,
6657 .fno = gen_helper_sve2_shrnb_h,
6658 .vece = MO_16 },
6659 { .fni8 = gen_shrnb32_i64,
6660 .fniv = gen_shrnb_vec,
6661 .opt_opc = vec_list,
6662 .fno = gen_helper_sve2_shrnb_s,
6663 .vece = MO_32 },
6664 { .fni8 = gen_shrnb64_i64,
6665 .fniv = gen_shrnb_vec,
6666 .opt_opc = vec_list,
6667 .fno = gen_helper_sve2_shrnb_d,
6668 .vece = MO_64 },
6669 };
6670 return do_sve2_shr_narrow(s, a, ops);
6671}
6672
6673static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6674{
6675 int halfbits = 4 << vece;
6676 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6677
6678 tcg_gen_shli_i64(n, n, halfbits - shr);
6679 tcg_gen_andi_i64(n, n, ~mask);
6680 tcg_gen_andi_i64(d, d, mask);
6681 tcg_gen_or_i64(d, d, n);
6682}
6683
6684static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6685{
6686 gen_shrnt_i64(MO_16, d, n, shr);
6687}
6688
6689static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6690{
6691 gen_shrnt_i64(MO_32, d, n, shr);
6692}
6693
6694static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6695{
6696 tcg_gen_shri_i64(n, n, shr);
6697 tcg_gen_deposit_i64(d, d, n, 32, 32);
6698}
6699
6700static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6701{
6702 TCGv_vec t = tcg_temp_new_vec_matching(d);
6703 int halfbits = 4 << vece;
6704 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6705
6706 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6707 tcg_gen_dupi_vec(vece, t, mask);
6708 tcg_gen_bitsel_vec(vece, d, t, d, n);
6709 tcg_temp_free_vec(t);
6710}
6711
6712static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
6713{
6714 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
6715 static const GVecGen2i ops[3] = {
6716 { .fni8 = gen_shrnt16_i64,
6717 .fniv = gen_shrnt_vec,
6718 .opt_opc = vec_list,
6719 .load_dest = true,
6720 .fno = gen_helper_sve2_shrnt_h,
6721 .vece = MO_16 },
6722 { .fni8 = gen_shrnt32_i64,
6723 .fniv = gen_shrnt_vec,
6724 .opt_opc = vec_list,
6725 .load_dest = true,
6726 .fno = gen_helper_sve2_shrnt_s,
6727 .vece = MO_32 },
6728 { .fni8 = gen_shrnt64_i64,
6729 .fniv = gen_shrnt_vec,
6730 .opt_opc = vec_list,
6731 .load_dest = true,
6732 .fno = gen_helper_sve2_shrnt_d,
6733 .vece = MO_64 },
6734 };
6735 return do_sve2_shr_narrow(s, a, ops);
6736}
6737
6738static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
6739{
6740 static const GVecGen2i ops[3] = {
6741 { .fno = gen_helper_sve2_rshrnb_h },
6742 { .fno = gen_helper_sve2_rshrnb_s },
6743 { .fno = gen_helper_sve2_rshrnb_d },
6744 };
6745 return do_sve2_shr_narrow(s, a, ops);
6746}
6747
6748static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
6749{
6750 static const GVecGen2i ops[3] = {
6751 { .fno = gen_helper_sve2_rshrnt_h },
6752 { .fno = gen_helper_sve2_rshrnt_s },
6753 { .fno = gen_helper_sve2_rshrnt_d },
6754 };
6755 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
6756}
6757
6758static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6759 TCGv_vec n, int64_t shr)
6760{
6761 TCGv_vec t = tcg_temp_new_vec_matching(d);
6762 int halfbits = 4 << vece;
6763
6764 tcg_gen_sari_vec(vece, n, n, shr);
6765 tcg_gen_dupi_vec(vece, t, 0);
6766 tcg_gen_smax_vec(vece, n, n, t);
6767 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6768 tcg_gen_umin_vec(vece, d, n, t);
6769 tcg_temp_free_vec(t);
6770}
6771
6772static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
6773{
6774 static const TCGOpcode vec_list[] = {
6775 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6776 };
6777 static const GVecGen2i ops[3] = {
6778 { .fniv = gen_sqshrunb_vec,
6779 .opt_opc = vec_list,
6780 .fno = gen_helper_sve2_sqshrunb_h,
6781 .vece = MO_16 },
6782 { .fniv = gen_sqshrunb_vec,
6783 .opt_opc = vec_list,
6784 .fno = gen_helper_sve2_sqshrunb_s,
6785 .vece = MO_32 },
6786 { .fniv = gen_sqshrunb_vec,
6787 .opt_opc = vec_list,
6788 .fno = gen_helper_sve2_sqshrunb_d,
6789 .vece = MO_64 },
6790 };
6791 return do_sve2_shr_narrow(s, a, ops);
6792}
6793
6794static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6795 TCGv_vec n, int64_t shr)
6796{
6797 TCGv_vec t = tcg_temp_new_vec_matching(d);
6798 int halfbits = 4 << vece;
6799
6800 tcg_gen_sari_vec(vece, n, n, shr);
6801 tcg_gen_dupi_vec(vece, t, 0);
6802 tcg_gen_smax_vec(vece, n, n, t);
6803 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6804 tcg_gen_umin_vec(vece, n, n, t);
6805 tcg_gen_shli_vec(vece, n, n, halfbits);
6806 tcg_gen_bitsel_vec(vece, d, t, d, n);
6807 tcg_temp_free_vec(t);
6808}
6809
6810static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
6811{
6812 static const TCGOpcode vec_list[] = {
6813 INDEX_op_shli_vec, INDEX_op_sari_vec,
6814 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6815 };
6816 static const GVecGen2i ops[3] = {
6817 { .fniv = gen_sqshrunt_vec,
6818 .opt_opc = vec_list,
6819 .load_dest = true,
6820 .fno = gen_helper_sve2_sqshrunt_h,
6821 .vece = MO_16 },
6822 { .fniv = gen_sqshrunt_vec,
6823 .opt_opc = vec_list,
6824 .load_dest = true,
6825 .fno = gen_helper_sve2_sqshrunt_s,
6826 .vece = MO_32 },
6827 { .fniv = gen_sqshrunt_vec,
6828 .opt_opc = vec_list,
6829 .load_dest = true,
6830 .fno = gen_helper_sve2_sqshrunt_d,
6831 .vece = MO_64 },
6832 };
6833 return do_sve2_shr_narrow(s, a, ops);
6834}
6835
6836static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
6837{
6838 static const GVecGen2i ops[3] = {
6839 { .fno = gen_helper_sve2_sqrshrunb_h },
6840 { .fno = gen_helper_sve2_sqrshrunb_s },
6841 { .fno = gen_helper_sve2_sqrshrunb_d },
6842 };
6843 return do_sve2_shr_narrow(s, a, ops);
6844}
6845
6846static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
6847{
6848 static const GVecGen2i ops[3] = {
6849 { .fno = gen_helper_sve2_sqrshrunt_h },
6850 { .fno = gen_helper_sve2_sqrshrunt_s },
6851 { .fno = gen_helper_sve2_sqrshrunt_d },
6852 };
6853 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
6854}
6855
743bb147
RH
6856static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6857 TCGv_vec n, int64_t shr)
6858{
6859 TCGv_vec t = tcg_temp_new_vec_matching(d);
6860 int halfbits = 4 << vece;
6861 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6862 int64_t min = -max - 1;
6863
6864 tcg_gen_sari_vec(vece, n, n, shr);
6865 tcg_gen_dupi_vec(vece, t, min);
6866 tcg_gen_smax_vec(vece, n, n, t);
6867 tcg_gen_dupi_vec(vece, t, max);
6868 tcg_gen_smin_vec(vece, n, n, t);
6869 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6870 tcg_gen_and_vec(vece, d, n, t);
6871 tcg_temp_free_vec(t);
6872}
6873
6874static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
6875{
6876 static const TCGOpcode vec_list[] = {
6877 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6878 };
6879 static const GVecGen2i ops[3] = {
6880 { .fniv = gen_sqshrnb_vec,
6881 .opt_opc = vec_list,
6882 .fno = gen_helper_sve2_sqshrnb_h,
6883 .vece = MO_16 },
6884 { .fniv = gen_sqshrnb_vec,
6885 .opt_opc = vec_list,
6886 .fno = gen_helper_sve2_sqshrnb_s,
6887 .vece = MO_32 },
6888 { .fniv = gen_sqshrnb_vec,
6889 .opt_opc = vec_list,
6890 .fno = gen_helper_sve2_sqshrnb_d,
6891 .vece = MO_64 },
6892 };
6893 return do_sve2_shr_narrow(s, a, ops);
6894}
6895
6896static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6897 TCGv_vec n, int64_t shr)
6898{
6899 TCGv_vec t = tcg_temp_new_vec_matching(d);
6900 int halfbits = 4 << vece;
6901 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6902 int64_t min = -max - 1;
6903
6904 tcg_gen_sari_vec(vece, n, n, shr);
6905 tcg_gen_dupi_vec(vece, t, min);
6906 tcg_gen_smax_vec(vece, n, n, t);
6907 tcg_gen_dupi_vec(vece, t, max);
6908 tcg_gen_smin_vec(vece, n, n, t);
6909 tcg_gen_shli_vec(vece, n, n, halfbits);
6910 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6911 tcg_gen_bitsel_vec(vece, d, t, d, n);
6912 tcg_temp_free_vec(t);
6913}
6914
6915static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
6916{
6917 static const TCGOpcode vec_list[] = {
6918 INDEX_op_shli_vec, INDEX_op_sari_vec,
6919 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6920 };
6921 static const GVecGen2i ops[3] = {
6922 { .fniv = gen_sqshrnt_vec,
6923 .opt_opc = vec_list,
6924 .load_dest = true,
6925 .fno = gen_helper_sve2_sqshrnt_h,
6926 .vece = MO_16 },
6927 { .fniv = gen_sqshrnt_vec,
6928 .opt_opc = vec_list,
6929 .load_dest = true,
6930 .fno = gen_helper_sve2_sqshrnt_s,
6931 .vece = MO_32 },
6932 { .fniv = gen_sqshrnt_vec,
6933 .opt_opc = vec_list,
6934 .load_dest = true,
6935 .fno = gen_helper_sve2_sqshrnt_d,
6936 .vece = MO_64 },
6937 };
6938 return do_sve2_shr_narrow(s, a, ops);
6939}
6940
6941static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
6942{
6943 static const GVecGen2i ops[3] = {
6944 { .fno = gen_helper_sve2_sqrshrnb_h },
6945 { .fno = gen_helper_sve2_sqrshrnb_s },
6946 { .fno = gen_helper_sve2_sqrshrnb_d },
6947 };
6948 return do_sve2_shr_narrow(s, a, ops);
6949}
6950
6951static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
6952{
6953 static const GVecGen2i ops[3] = {
6954 { .fno = gen_helper_sve2_sqrshrnt_h },
6955 { .fno = gen_helper_sve2_sqrshrnt_s },
6956 { .fno = gen_helper_sve2_sqrshrnt_d },
6957 };
6958 return do_sve2_shr_narrow(s, a, ops);
6959}
6960
c13418da
RH
6961static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6962 TCGv_vec n, int64_t shr)
6963{
6964 TCGv_vec t = tcg_temp_new_vec_matching(d);
6965 int halfbits = 4 << vece;
6966
6967 tcg_gen_shri_vec(vece, n, n, shr);
6968 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6969 tcg_gen_umin_vec(vece, d, n, t);
6970 tcg_temp_free_vec(t);
6971}
6972
6973static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
6974{
6975 static const TCGOpcode vec_list[] = {
6976 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6977 };
6978 static const GVecGen2i ops[3] = {
6979 { .fniv = gen_uqshrnb_vec,
6980 .opt_opc = vec_list,
6981 .fno = gen_helper_sve2_uqshrnb_h,
6982 .vece = MO_16 },
6983 { .fniv = gen_uqshrnb_vec,
6984 .opt_opc = vec_list,
6985 .fno = gen_helper_sve2_uqshrnb_s,
6986 .vece = MO_32 },
6987 { .fniv = gen_uqshrnb_vec,
6988 .opt_opc = vec_list,
6989 .fno = gen_helper_sve2_uqshrnb_d,
6990 .vece = MO_64 },
6991 };
6992 return do_sve2_shr_narrow(s, a, ops);
6993}
6994
6995static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6996 TCGv_vec n, int64_t shr)
6997{
6998 TCGv_vec t = tcg_temp_new_vec_matching(d);
6999 int halfbits = 4 << vece;
7000
7001 tcg_gen_shri_vec(vece, n, n, shr);
7002 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7003 tcg_gen_umin_vec(vece, n, n, t);
7004 tcg_gen_shli_vec(vece, n, n, halfbits);
7005 tcg_gen_bitsel_vec(vece, d, t, d, n);
7006 tcg_temp_free_vec(t);
7007}
7008
7009static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7010{
7011 static const TCGOpcode vec_list[] = {
7012 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7013 };
7014 static const GVecGen2i ops[3] = {
7015 { .fniv = gen_uqshrnt_vec,
7016 .opt_opc = vec_list,
7017 .load_dest = true,
7018 .fno = gen_helper_sve2_uqshrnt_h,
7019 .vece = MO_16 },
7020 { .fniv = gen_uqshrnt_vec,
7021 .opt_opc = vec_list,
7022 .load_dest = true,
7023 .fno = gen_helper_sve2_uqshrnt_s,
7024 .vece = MO_32 },
7025 { .fniv = gen_uqshrnt_vec,
7026 .opt_opc = vec_list,
7027 .load_dest = true,
7028 .fno = gen_helper_sve2_uqshrnt_d,
7029 .vece = MO_64 },
7030 };
7031 return do_sve2_shr_narrow(s, a, ops);
7032}
7033
7034static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7035{
7036 static const GVecGen2i ops[3] = {
7037 { .fno = gen_helper_sve2_uqrshrnb_h },
7038 { .fno = gen_helper_sve2_uqrshrnb_s },
7039 { .fno = gen_helper_sve2_uqrshrnb_d },
7040 };
7041 return do_sve2_shr_narrow(s, a, ops);
7042}
7043
7044static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7045{
7046 static const GVecGen2i ops[3] = {
7047 { .fno = gen_helper_sve2_uqrshrnt_h },
7048 { .fno = gen_helper_sve2_uqrshrnt_s },
7049 { .fno = gen_helper_sve2_uqrshrnt_d },
7050 };
7051 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7052}
b87dbeeb 7053
40d5ea50 7054#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7055 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7056 NULL, gen_helper_sve2_##name##_h, \
7057 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7058 }; \
bd394cf5
RH
7059 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7060 name##_fns[a->esz], a, 0)
40d5ea50
SL
7061
7062DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7063DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7064DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7065DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7066
c3cd6766
SL
7067DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7068DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7069DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7070DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7071
ef75309b
RH
7072static gen_helper_gvec_flags_4 * const match_fns[4] = {
7073 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7074};
7075TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 7076
ef75309b
RH
7077static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7078 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7079};
7080TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 7081
5880bdc0
RH
7082static gen_helper_gvec_4 * const histcnt_fns[4] = {
7083 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7084};
7085TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7086 histcnt_fns[a->esz], a, 0)
7d47ac94 7087
bd394cf5
RH
7088TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7089 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7090
7de2617b
RH
7091DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7092DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7093DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7094DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7095DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
bfc9307e
RH
7096
7097/*
7098 * SVE Integer Multiply-Add (unpredicated)
7099 */
7100
25aee7cc
RH
7101TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7102 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7103TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7104 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
4f26756b 7105
eeb4e84d
RH
7106static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7107 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7108 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7109};
7110TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7111 sqdmlal_zzzw_fns[a->esz], a, 0)
7112TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7113 sqdmlal_zzzw_fns[a->esz], a, 3)
7114TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7115 sqdmlal_zzzw_fns[a->esz], a, 2)
7116
7117static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7118 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7119 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7120};
7121TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7122 sqdmlsl_zzzw_fns[a->esz], a, 0)
7123TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7124 sqdmlsl_zzzw_fns[a->esz], a, 3)
7125TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7126 sqdmlsl_zzzw_fns[a->esz], a, 2)
7127
7128static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7129 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7130 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7131};
7132TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7133 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7134
eeb4e84d
RH
7135static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7136 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7137 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7138};
7139TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7140 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7141
eeb4e84d
RH
7142static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7143 NULL, gen_helper_sve2_smlal_zzzw_h,
7144 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7145};
7146TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7147 smlal_zzzw_fns[a->esz], a, 0)
7148TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7149 smlal_zzzw_fns[a->esz], a, 1)
7150
7151static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7152 NULL, gen_helper_sve2_umlal_zzzw_h,
7153 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7154};
7155TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7156 umlal_zzzw_fns[a->esz], a, 0)
7157TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7158 umlal_zzzw_fns[a->esz], a, 1)
7159
7160static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7161 NULL, gen_helper_sve2_smlsl_zzzw_h,
7162 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7163};
7164TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7165 smlsl_zzzw_fns[a->esz], a, 0)
7166TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7167 smlsl_zzzw_fns[a->esz], a, 1)
7168
7169static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7170 NULL, gen_helper_sve2_umlsl_zzzw_h,
7171 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7172};
7173TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7174 umlsl_zzzw_fns[a->esz], a, 0)
7175TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7176 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7177
5f425b92
RH
7178static gen_helper_gvec_4 * const cmla_fns[] = {
7179 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7180 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7181};
7182TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7183 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7184
5f425b92
RH
7185static gen_helper_gvec_4 * const cdot_fns[] = {
7186 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7187};
7188TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7189 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7190
5f425b92
RH
7191static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7192 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7193 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7194};
7195TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7196 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7197
8740d694
RH
7198TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7199 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7200
0ea3cdbf
RH
7201TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7202 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7203
32e2ad65
RH
7204TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7205 gen_helper_crypto_aese, a, false)
7206TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7207 gen_helper_crypto_aese, a, true)
3cc7a88e 7208
32e2ad65
RH
7209TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7210 gen_helper_crypto_sm4e, a, 0)
7211TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7212 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7213
2aa469ff 7214TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
5c1b7226 7215
0360730c
RH
7216TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7217 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7218TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7219 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
83c2523f 7220
0360730c
RH
7221TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7222 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
83c2523f 7223
0360730c
RH
7224TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7225 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7226TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7227 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
95365277 7228
27645836
RH
7229TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7230 float_round_to_odd, gen_helper_sve_fcvt_ds)
7231TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7232 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
631be02e 7233
7b9dfcfe
RH
7234static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7235 NULL, gen_helper_flogb_h,
7236 gen_helper_flogb_s, gen_helper_flogb_d
7237};
7238TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7239 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
50d102bd
SL
7240
7241static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7242{
7243 if (!dc_isar_feature(aa64_sve2, s)) {
7244 return false;
7245 }
41bf9b67
RH
7246 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7247 a->rd, a->rn, a->rm, a->ra,
7248 (sel << 1) | sub, cpu_env);
50d102bd
SL
7249}
7250
7251static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7252{
7253 return do_FMLAL_zzzw(s, a, false, false);
7254}
7255
7256static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7257{
7258 return do_FMLAL_zzzw(s, a, false, true);
7259}
7260
7261static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7262{
7263 return do_FMLAL_zzzw(s, a, true, false);
7264}
7265
7266static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7267{
7268 return do_FMLAL_zzzw(s, a, true, true);
7269}
7270
7271static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7272{
7273 if (!dc_isar_feature(aa64_sve2, s)) {
7274 return false;
7275 }
41bf9b67
RH
7276 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7277 a->rd, a->rn, a->rm, a->ra,
7278 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7279}
7280
7281static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7282{
7283 return do_FMLAL_zzxw(s, a, false, false);
7284}
7285
7286static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7287{
7288 return do_FMLAL_zzxw(s, a, false, true);
7289}
7290
7291static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7292{
7293 return do_FMLAL_zzxw(s, a, true, false);
7294}
7295
7296static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7297{
7298 return do_FMLAL_zzxw(s, a, true, true);
7299}
2323c5ff 7300
eec05e4e
RH
7301TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7302 gen_helper_gvec_smmla_b, a, 0)
7303TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7304 gen_helper_gvec_usmmla_b, a, 0)
7305TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7306 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7307
eec05e4e
RH
7308TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7309 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7310TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7311 gen_helper_gvec_bfdot_idx, a)
81266a1f 7312
eec05e4e
RH
7313TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7314 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7315
7316static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7317{
41bf9b67
RH
7318 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7319 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7320}
7321
698ddb9d
RH
7322TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7323TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
458d0ab6
RH
7324
7325static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7326{
41bf9b67
RH
7327 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7328 a->rd, a->rn, a->rm, a->ra,
7329 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7330}
7331
698ddb9d
RH
7332TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7333TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)