]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Mark LDFF1 and LDNF1 as non-streaming
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
40e32e5a 103/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 104static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
105 int rd, int rn, int data)
106{
c5edf07d
RH
107 if (fn == NULL) {
108 return false;
109 }
110 if (sve_access_check(s)) {
111 unsigned vsz = vec_full_reg_size(s);
112 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
113 vec_full_reg_offset(s, rn),
114 vsz, vsz, data, fn);
115 }
116 return true;
40e32e5a
RH
117}
118
de58c6b0
RH
119static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
120 int rd, int rn, int data,
121 ARMFPStatusFlavour flavour)
122{
123 if (fn == NULL) {
124 return false;
125 }
126 if (sve_access_check(s)) {
127 unsigned vsz = vec_full_reg_size(s);
128 TCGv_ptr status = fpstatus_ptr(flavour);
129
130 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
131 vec_full_reg_offset(s, rn),
132 status, vsz, vsz, data, fn);
133 tcg_temp_free_ptr(status);
134 }
135 return true;
136}
137
138static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
139 arg_rr_esz *a, int data)
140{
141 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
142 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
143}
144
e645d1a1 145/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 146static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
147 int rd, int rn, int rm, int data)
148{
913a8a00
RH
149 if (fn == NULL) {
150 return false;
151 }
152 if (sve_access_check(s)) {
153 unsigned vsz = vec_full_reg_size(s);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
155 vec_full_reg_offset(s, rn),
156 vec_full_reg_offset(s, rm),
157 vsz, vsz, data, fn);
158 }
159 return true;
e645d1a1
RH
160}
161
84a272f5
RH
162static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
163 arg_rrr_esz *a, int data)
164{
165 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
166}
167
532724e4
RH
168/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
169static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
170 int rd, int rn, int rm,
171 int data, ARMFPStatusFlavour flavour)
172{
173 if (fn == NULL) {
174 return false;
175 }
176 if (sve_access_check(s)) {
177 unsigned vsz = vec_full_reg_size(s);
178 TCGv_ptr status = fpstatus_ptr(flavour);
179
180 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
181 vec_full_reg_offset(s, rn),
182 vec_full_reg_offset(s, rm),
183 status, vsz, vsz, data, fn);
184
185 tcg_temp_free_ptr(status);
186 }
187 return true;
188}
189
190static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
191 arg_rrr_esz *a, int data)
192{
193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
194 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
195}
196
38650638 197/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 198static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
199 int rd, int rn, int rm, int ra, int data)
200{
7ad416b1
RH
201 if (fn == NULL) {
202 return false;
203 }
204 if (sve_access_check(s)) {
205 unsigned vsz = vec_full_reg_size(s);
206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
207 vec_full_reg_offset(s, rn),
208 vec_full_reg_offset(s, rm),
209 vec_full_reg_offset(s, ra),
210 vsz, vsz, data, fn);
211 }
212 return true;
38650638
RH
213}
214
cab79ac9
RH
215static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
216 arg_rrrr_esz *a, int data)
217{
218 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
219}
220
e82d3536
RH
221static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
222 arg_rrxr_esz *a)
223{
224 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
225}
226
41bf9b67
RH
227/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
228static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
229 int rd, int rn, int rm, int ra,
230 int data, TCGv_ptr ptr)
231{
232 if (fn == NULL) {
233 return false;
234 }
235 if (sve_access_check(s)) {
236 unsigned vsz = vec_full_reg_size(s);
237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
238 vec_full_reg_offset(s, rn),
239 vec_full_reg_offset(s, rm),
240 vec_full_reg_offset(s, ra),
241 ptr, vsz, vsz, data, fn);
242 }
243 return true;
244}
245
246static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
247 int rd, int rn, int rm, int ra,
248 int data, ARMFPStatusFlavour flavour)
249{
250 TCGv_ptr status = fpstatus_ptr(flavour);
251 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
252 tcg_temp_free_ptr(status);
253 return ret;
254}
255
e14da110
RH
256/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
257static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
258 int rd, int rn, int rm, int ra, int pg,
259 int data, ARMFPStatusFlavour flavour)
260{
261 if (fn == NULL) {
262 return false;
263 }
264 if (sve_access_check(s)) {
265 unsigned vsz = vec_full_reg_size(s);
266 TCGv_ptr status = fpstatus_ptr(flavour);
267
268 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
269 vec_full_reg_offset(s, rn),
270 vec_full_reg_offset(s, rm),
271 vec_full_reg_offset(s, ra),
272 pred_full_reg_offset(s, pg),
273 status, vsz, vsz, data, fn);
274
275 tcg_temp_free_ptr(status);
276 }
277 return true;
278}
279
96a461f7 280/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 281static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
282 int rd, int rn, int pg, int data)
283{
8fb27a21
RH
284 if (fn == NULL) {
285 return false;
286 }
287 if (sve_access_check(s)) {
288 unsigned vsz = vec_full_reg_size(s);
289 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
290 vec_full_reg_offset(s, rn),
291 pred_full_reg_offset(s, pg),
292 vsz, vsz, data, fn);
293 }
294 return true;
96a461f7
RH
295}
296
b051809a
RH
297static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
298 arg_rpr_esz *a, int data)
299{
300 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
301}
302
afa2529c
RH
303static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
304 arg_rpri_esz *a)
305{
306 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
307}
b051809a 308
0360730c
RH
309static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
310 int rd, int rn, int pg, int data,
311 ARMFPStatusFlavour flavour)
312{
313 if (fn == NULL) {
314 return false;
315 }
316 if (sve_access_check(s)) {
317 unsigned vsz = vec_full_reg_size(s);
318 TCGv_ptr status = fpstatus_ptr(flavour);
319
320 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
321 vec_full_reg_offset(s, rn),
322 pred_full_reg_offset(s, pg),
323 status, vsz, vsz, data, fn);
324 tcg_temp_free_ptr(status);
325 }
326 return true;
327}
328
329static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
330 arg_rpr_esz *a, int data,
331 ARMFPStatusFlavour flavour)
332{
333 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
334}
335
36cbb7a8 336/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 337static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
338 int rd, int rn, int rm, int pg, int data)
339{
2a753d1e
RH
340 if (fn == NULL) {
341 return false;
342 }
343 if (sve_access_check(s)) {
344 unsigned vsz = vec_full_reg_size(s);
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
346 vec_full_reg_offset(s, rn),
347 vec_full_reg_offset(s, rm),
348 pred_full_reg_offset(s, pg),
349 vsz, vsz, data, fn);
350 }
351 return true;
36cbb7a8 352}
f7d79c41 353
312016c9
RH
354static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
355 arg_rprr_esz *a, int data)
356{
357 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
358}
359
7e2d07ff
RH
360/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
361static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
362 int rd, int rn, int rm, int pg, int data,
363 ARMFPStatusFlavour flavour)
364{
365 if (fn == NULL) {
366 return false;
367 }
368 if (sve_access_check(s)) {
369 unsigned vsz = vec_full_reg_size(s);
370 TCGv_ptr status = fpstatus_ptr(flavour);
371
372 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
373 vec_full_reg_offset(s, rn),
374 vec_full_reg_offset(s, rm),
375 pred_full_reg_offset(s, pg),
376 status, vsz, vsz, data, fn);
377 tcg_temp_free_ptr(status);
378 }
379 return true;
380}
381
382static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
383 arg_rprr_esz *a)
384{
385 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
386 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
387}
388
faf915e2
RH
389/* Invoke a vector expander on two Zregs and an immediate. */
390static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
391 int esz, int rd, int rn, uint64_t imm)
392{
393 if (gvec_fn == NULL) {
394 return false;
395 }
396 if (sve_access_check(s)) {
397 unsigned vsz = vec_full_reg_size(s);
398 gvec_fn(esz, vec_full_reg_offset(s, rd),
399 vec_full_reg_offset(s, rn), imm, vsz, vsz);
400 }
401 return true;
402}
403
ada378f0
RH
404static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
405 arg_rri_esz *a)
406{
407 if (a->esz < 0) {
408 /* Invalid tsz encoding -- see tszimm_esz. */
409 return false;
410 }
411 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
412}
413
39eea561 414/* Invoke a vector expander on three Zregs. */
50f6db5f 415static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 416 int esz, int rd, int rn, int rm)
38388f7e 417{
50f6db5f
RH
418 if (gvec_fn == NULL) {
419 return false;
420 }
421 if (sve_access_check(s)) {
422 unsigned vsz = vec_full_reg_size(s);
423 gvec_fn(esz, vec_full_reg_offset(s, rd),
424 vec_full_reg_offset(s, rn),
425 vec_full_reg_offset(s, rm), vsz, vsz);
426 }
427 return true;
38388f7e
RH
428}
429
cd54bbe6
RH
430static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
431 arg_rrr_esz *a)
432{
433 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
434}
435
911cdc6d 436/* Invoke a vector expander on four Zregs. */
189876af
RH
437static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
438 arg_rrrr_esz *a)
911cdc6d 439{
189876af
RH
440 if (gvec_fn == NULL) {
441 return false;
442 }
443 if (sve_access_check(s)) {
444 unsigned vsz = vec_full_reg_size(s);
445 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
446 vec_full_reg_offset(s, a->rn),
447 vec_full_reg_offset(s, a->rm),
448 vec_full_reg_offset(s, a->ra), vsz, vsz);
449 }
450 return true;
911cdc6d
RH
451}
452
39eea561
RH
453/* Invoke a vector move on two Zregs. */
454static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 455{
f7d79c41 456 if (sve_access_check(s)) {
5f730621
RH
457 unsigned vsz = vec_full_reg_size(s);
458 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
459 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
460 }
461 return true;
38388f7e
RH
462}
463
d9d78dcc
RH
464/* Initialize a Zreg with replications of a 64-bit immediate. */
465static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
466{
467 unsigned vsz = vec_full_reg_size(s);
8711e71f 468 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
469}
470
516e246a 471/* Invoke a vector expander on three Pregs. */
23e5fa5f 472static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
dd81a8d7 473 int rd, int rn, int rm)
516e246a 474{
23e5fa5f
RH
475 if (sve_access_check(s)) {
476 unsigned psz = pred_gvec_reg_size(s);
477 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
478 pred_full_reg_offset(s, rn),
479 pred_full_reg_offset(s, rm), psz, psz);
480 }
481 return true;
516e246a
RH
482}
483
484/* Invoke a vector move on two Pregs. */
485static bool do_mov_p(DisasContext *s, int rd, int rn)
486{
d0b2df5a
RH
487 if (sve_access_check(s)) {
488 unsigned psz = pred_gvec_reg_size(s);
489 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
490 pred_full_reg_offset(s, rn), psz, psz);
491 }
492 return true;
516e246a
RH
493}
494
9e18d7a6
RH
495/* Set the cpu flags as per a return from an SVE helper. */
496static void do_pred_flags(TCGv_i32 t)
497{
498 tcg_gen_mov_i32(cpu_NF, t);
499 tcg_gen_andi_i32(cpu_ZF, t, 2);
500 tcg_gen_andi_i32(cpu_CF, t, 1);
501 tcg_gen_movi_i32(cpu_VF, 0);
502}
503
504/* Subroutines computing the ARM PredTest psuedofunction. */
505static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
506{
507 TCGv_i32 t = tcg_temp_new_i32();
508
509 gen_helper_sve_predtest1(t, d, g);
510 do_pred_flags(t);
511 tcg_temp_free_i32(t);
512}
513
514static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
515{
516 TCGv_ptr dptr = tcg_temp_new_ptr();
517 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 518 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
519
520 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
521 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 522
392acacc 523 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
524 tcg_temp_free_ptr(dptr);
525 tcg_temp_free_ptr(gptr);
526
527 do_pred_flags(t);
528 tcg_temp_free_i32(t);
529}
530
028e2a7b
RH
531/* For each element size, the bits within a predicate word that are active. */
532const uint64_t pred_esz_masks[4] = {
533 0xffffffffffffffffull, 0x5555555555555555ull,
534 0x1111111111111111ull, 0x0101010101010101ull
535};
536
c437c59b
RH
537static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
538{
539 unallocated_encoding(s);
540 return true;
541}
542
39eea561
RH
543/*
544 *** SVE Logical - Unpredicated Group
545 */
546
b262215b
RH
547TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
548TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
549TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
550TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 551
e6eba6e5
RH
552static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
553{
554 TCGv_i64 t = tcg_temp_new_i64();
555 uint64_t mask = dup_const(MO_8, 0xff >> sh);
556
557 tcg_gen_xor_i64(t, n, m);
558 tcg_gen_shri_i64(d, t, sh);
559 tcg_gen_shli_i64(t, t, 8 - sh);
560 tcg_gen_andi_i64(d, d, mask);
561 tcg_gen_andi_i64(t, t, ~mask);
562 tcg_gen_or_i64(d, d, t);
563 tcg_temp_free_i64(t);
564}
565
566static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
567{
568 TCGv_i64 t = tcg_temp_new_i64();
569 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
570
571 tcg_gen_xor_i64(t, n, m);
572 tcg_gen_shri_i64(d, t, sh);
573 tcg_gen_shli_i64(t, t, 16 - sh);
574 tcg_gen_andi_i64(d, d, mask);
575 tcg_gen_andi_i64(t, t, ~mask);
576 tcg_gen_or_i64(d, d, t);
577 tcg_temp_free_i64(t);
578}
579
580static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
581{
582 tcg_gen_xor_i32(d, n, m);
583 tcg_gen_rotri_i32(d, d, sh);
584}
585
586static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
587{
588 tcg_gen_xor_i64(d, n, m);
589 tcg_gen_rotri_i64(d, d, sh);
590}
591
592static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
593 TCGv_vec m, int64_t sh)
594{
595 tcg_gen_xor_vec(vece, d, n, m);
596 tcg_gen_rotri_vec(vece, d, d, sh);
597}
598
599void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
600 uint32_t rm_ofs, int64_t shift,
601 uint32_t opr_sz, uint32_t max_sz)
602{
603 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
604 static const GVecGen3i ops[4] = {
605 { .fni8 = gen_xar8_i64,
606 .fniv = gen_xar_vec,
607 .fno = gen_helper_sve2_xar_b,
608 .opt_opc = vecop,
609 .vece = MO_8 },
610 { .fni8 = gen_xar16_i64,
611 .fniv = gen_xar_vec,
612 .fno = gen_helper_sve2_xar_h,
613 .opt_opc = vecop,
614 .vece = MO_16 },
615 { .fni4 = gen_xar_i32,
616 .fniv = gen_xar_vec,
617 .fno = gen_helper_sve2_xar_s,
618 .opt_opc = vecop,
619 .vece = MO_32 },
620 { .fni8 = gen_xar_i64,
621 .fniv = gen_xar_vec,
622 .fno = gen_helper_gvec_xar_d,
623 .opt_opc = vecop,
624 .vece = MO_64 }
625 };
626 int esize = 8 << vece;
627
628 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
629 tcg_debug_assert(shift >= 0);
630 tcg_debug_assert(shift <= esize);
631 shift &= esize - 1;
632
633 if (shift == 0) {
634 /* xar with no rotate devolves to xor. */
635 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
636 } else {
637 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
638 shift, &ops[vece]);
639 }
640}
641
642static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
643{
644 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
645 return false;
646 }
647 if (sve_access_check(s)) {
648 unsigned vsz = vec_full_reg_size(s);
649 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
650 vec_full_reg_offset(s, a->rn),
651 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
652 }
653 return true;
654}
655
911cdc6d
RH
656static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
657{
658 tcg_gen_xor_i64(d, n, m);
659 tcg_gen_xor_i64(d, d, k);
660}
661
662static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
663 TCGv_vec m, TCGv_vec k)
664{
665 tcg_gen_xor_vec(vece, d, n, m);
666 tcg_gen_xor_vec(vece, d, d, k);
667}
668
669static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
670 uint32_t a, uint32_t oprsz, uint32_t maxsz)
671{
672 static const GVecGen4 op = {
673 .fni8 = gen_eor3_i64,
674 .fniv = gen_eor3_vec,
675 .fno = gen_helper_sve2_eor3,
676 .vece = MO_64,
677 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
678 };
679 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
680}
681
b773a5c8 682TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
683
684static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
685{
686 tcg_gen_andc_i64(d, m, k);
687 tcg_gen_xor_i64(d, d, n);
688}
689
690static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
691 TCGv_vec m, TCGv_vec k)
692{
693 tcg_gen_andc_vec(vece, d, m, k);
694 tcg_gen_xor_vec(vece, d, d, n);
695}
696
697static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
698 uint32_t a, uint32_t oprsz, uint32_t maxsz)
699{
700 static const GVecGen4 op = {
701 .fni8 = gen_bcax_i64,
702 .fniv = gen_bcax_vec,
703 .fno = gen_helper_sve2_bcax,
704 .vece = MO_64,
705 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
706 };
707 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
708}
709
b773a5c8 710TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
711
712static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
713 uint32_t a, uint32_t oprsz, uint32_t maxsz)
714{
715 /* BSL differs from the generic bitsel in argument ordering. */
716 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
717}
718
b773a5c8 719TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
720
721static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
722{
723 tcg_gen_andc_i64(n, k, n);
724 tcg_gen_andc_i64(m, m, k);
725 tcg_gen_or_i64(d, n, m);
726}
727
728static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
729 TCGv_vec m, TCGv_vec k)
730{
731 if (TCG_TARGET_HAS_bitsel_vec) {
732 tcg_gen_not_vec(vece, n, n);
733 tcg_gen_bitsel_vec(vece, d, k, n, m);
734 } else {
735 tcg_gen_andc_vec(vece, n, k, n);
736 tcg_gen_andc_vec(vece, m, m, k);
737 tcg_gen_or_vec(vece, d, n, m);
738 }
739}
740
741static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
742 uint32_t a, uint32_t oprsz, uint32_t maxsz)
743{
744 static const GVecGen4 op = {
745 .fni8 = gen_bsl1n_i64,
746 .fniv = gen_bsl1n_vec,
747 .fno = gen_helper_sve2_bsl1n,
748 .vece = MO_64,
749 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
750 };
751 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
752}
753
b773a5c8 754TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
755
756static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
757{
758 /*
759 * Z[dn] = (n & k) | (~m & ~k)
760 * = | ~(m | k)
761 */
762 tcg_gen_and_i64(n, n, k);
763 if (TCG_TARGET_HAS_orc_i64) {
764 tcg_gen_or_i64(m, m, k);
765 tcg_gen_orc_i64(d, n, m);
766 } else {
767 tcg_gen_nor_i64(m, m, k);
768 tcg_gen_or_i64(d, n, m);
769 }
770}
771
772static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
773 TCGv_vec m, TCGv_vec k)
774{
775 if (TCG_TARGET_HAS_bitsel_vec) {
776 tcg_gen_not_vec(vece, m, m);
777 tcg_gen_bitsel_vec(vece, d, k, n, m);
778 } else {
779 tcg_gen_and_vec(vece, n, n, k);
780 tcg_gen_or_vec(vece, m, m, k);
781 tcg_gen_orc_vec(vece, d, n, m);
782 }
783}
784
785static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
786 uint32_t a, uint32_t oprsz, uint32_t maxsz)
787{
788 static const GVecGen4 op = {
789 .fni8 = gen_bsl2n_i64,
790 .fniv = gen_bsl2n_vec,
791 .fno = gen_helper_sve2_bsl2n,
792 .vece = MO_64,
793 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
794 };
795 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
796}
797
b773a5c8 798TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
799
800static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
801{
802 tcg_gen_and_i64(n, n, k);
803 tcg_gen_andc_i64(m, m, k);
804 tcg_gen_nor_i64(d, n, m);
805}
806
807static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
808 TCGv_vec m, TCGv_vec k)
809{
810 tcg_gen_bitsel_vec(vece, d, k, n, m);
811 tcg_gen_not_vec(vece, d, d);
812}
813
814static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
815 uint32_t a, uint32_t oprsz, uint32_t maxsz)
816{
817 static const GVecGen4 op = {
818 .fni8 = gen_nbsl_i64,
819 .fniv = gen_nbsl_vec,
820 .fno = gen_helper_sve2_nbsl,
821 .vece = MO_64,
822 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
823 };
824 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
825}
826
b773a5c8 827TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 828
fea98f9c
RH
829/*
830 *** SVE Integer Arithmetic - Unpredicated Group
831 */
832
b262215b
RH
833TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
834TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
835TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
836TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
837TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
838TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 839
f97cfd59
RH
840/*
841 *** SVE Integer Arithmetic - Binary Predicated Group
842 */
843
a2103582
RH
844/* Select active elememnts from Zn and inactive elements from Zm,
845 * storing the result in Zd.
846 */
68cc4ee3 847static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
848{
849 static gen_helper_gvec_4 * const fns[4] = {
850 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
851 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
852 };
68cc4ee3 853 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
854}
855
8e7acb24
RH
856#define DO_ZPZZ(NAME, FEAT, name) \
857 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
858 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
859 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 860 }; \
8e7acb24
RH
861 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
862 name##_zpzz_fns[a->esz], a, 0)
863
864DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
865DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
866DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
867DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
868
869DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
870DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
871
872DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
873DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
874DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
875DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
876DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
877DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
878
879DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
880DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
881DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
882
883DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
884DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
885DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
886
887static gen_helper_gvec_4 * const sdiv_fns[4] = {
888 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
889};
890TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 891
8e7acb24
RH
892static gen_helper_gvec_4 * const udiv_fns[4] = {
893 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
894};
895TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 896
29693f5f 897TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
d3fe4a29 898
afac6d04
RH
899/*
900 *** SVE Integer Arithmetic - Unary Predicated Group
901 */
902
817bd5c9
RH
903#define DO_ZPZ(NAME, FEAT, name) \
904 static gen_helper_gvec_3 * const name##_fns[4] = { \
905 gen_helper_##name##_b, gen_helper_##name##_h, \
906 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 907 }; \
817bd5c9
RH
908 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
909
910DO_ZPZ(CLS, aa64_sve, sve_cls)
911DO_ZPZ(CLZ, aa64_sve, sve_clz)
912DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
913DO_ZPZ(CNOT, aa64_sve, sve_cnot)
914DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
915DO_ZPZ(ABS, aa64_sve, sve_abs)
916DO_ZPZ(NEG, aa64_sve, sve_neg)
917DO_ZPZ(RBIT, aa64_sve, sve_rbit)
918
919static gen_helper_gvec_3 * const fabs_fns[4] = {
920 NULL, gen_helper_sve_fabs_h,
921 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
922};
923TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 924
817bd5c9
RH
925static gen_helper_gvec_3 * const fneg_fns[4] = {
926 NULL, gen_helper_sve_fneg_h,
927 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
928};
929TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 930
817bd5c9
RH
931static gen_helper_gvec_3 * const sxtb_fns[4] = {
932 NULL, gen_helper_sve_sxtb_h,
933 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
934};
935TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 936
817bd5c9
RH
937static gen_helper_gvec_3 * const uxtb_fns[4] = {
938 NULL, gen_helper_sve_uxtb_h,
939 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
940};
941TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 942
817bd5c9
RH
943static gen_helper_gvec_3 * const sxth_fns[4] = {
944 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
945};
946TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 947
817bd5c9
RH
948static gen_helper_gvec_3 * const uxth_fns[4] = {
949 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
950};
951TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 952
817bd5c9
RH
953TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
954 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
955TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
956 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 957
047cec97
RH
958/*
959 *** SVE Integer Reduction Group
960 */
961
962typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
963static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
964 gen_helper_gvec_reduc *fn)
965{
966 unsigned vsz = vec_full_reg_size(s);
967 TCGv_ptr t_zn, t_pg;
968 TCGv_i32 desc;
969 TCGv_i64 temp;
970
971 if (fn == NULL) {
972 return false;
973 }
974 if (!sve_access_check(s)) {
975 return true;
976 }
977
c6a59b55 978 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
979 temp = tcg_temp_new_i64();
980 t_zn = tcg_temp_new_ptr();
981 t_pg = tcg_temp_new_ptr();
982
983 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
984 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
985 fn(temp, t_zn, t_pg, desc);
986 tcg_temp_free_ptr(t_zn);
987 tcg_temp_free_ptr(t_pg);
047cec97
RH
988
989 write_fp_dreg(s, a->rd, temp);
990 tcg_temp_free_i64(temp);
991 return true;
992}
993
994#define DO_VPZ(NAME, name) \
9ac24f1f 995 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
996 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
997 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
998 }; \
9ac24f1f 999 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
1000
1001DO_VPZ(ORV, orv)
1002DO_VPZ(ANDV, andv)
1003DO_VPZ(EORV, eorv)
1004
1005DO_VPZ(UADDV, uaddv)
1006DO_VPZ(SMAXV, smaxv)
1007DO_VPZ(UMAXV, umaxv)
1008DO_VPZ(SMINV, sminv)
1009DO_VPZ(UMINV, uminv)
1010
9ac24f1f
RH
1011static gen_helper_gvec_reduc * const saddv_fns[4] = {
1012 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
1013 gen_helper_sve_saddv_s, NULL
1014};
1015TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
1016
1017#undef DO_VPZ
1018
ccd841c3
RH
1019/*
1020 *** SVE Shift by Immediate - Predicated Group
1021 */
1022
60245996
RH
1023/*
1024 * Copy Zn into Zd, storing zeros into inactive elements.
1025 * If invert, store zeros into the active elements.
ccd841c3 1026 */
60245996
RH
1027static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1028 int esz, bool invert)
ccd841c3 1029{
60245996
RH
1030 static gen_helper_gvec_3 * const fns[4] = {
1031 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1032 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 1033 };
8fb27a21 1034 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
1035}
1036
73c558a8
RH
1037static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1038 gen_helper_gvec_3 * const fns[4])
ccd841c3 1039{
73c558a8
RH
1040 int max;
1041
ccd841c3
RH
1042 if (a->esz < 0) {
1043 /* Invalid tsz encoding -- see tszimm_esz. */
1044 return false;
1045 }
73c558a8
RH
1046
1047 /*
1048 * Shift by element size is architecturally valid.
1049 * For arithmetic right-shift, it's the same as by one less.
1050 * For logical shifts and ASRD, it is a zeroing operation.
1051 */
1052 max = 8 << a->esz;
1053 if (a->imm >= max) {
1054 if (asr) {
1055 a->imm = max - 1;
1056 } else {
1057 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1058 }
1059 }
afa2529c 1060 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1061}
1062
5cccd1f1
RH
1063static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1064 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1065 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1066};
1067TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 1068
5cccd1f1
RH
1069static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1070 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1071 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1072};
1073TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 1074
5cccd1f1
RH
1075static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1076 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1077 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1078};
1079TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 1080
5cccd1f1
RH
1081static gen_helper_gvec_3 * const asrd_fns[4] = {
1082 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1083 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1084};
1085TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 1086
4df37e41
RH
1087static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1088 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1089 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1090};
1091TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1092 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1093
4df37e41
RH
1094static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1095 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1096 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1097};
1098TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1099 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1100
4df37e41
RH
1101static gen_helper_gvec_3 * const srshr_fns[4] = {
1102 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1103 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1104};
1105TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1106 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1107
4df37e41
RH
1108static gen_helper_gvec_3 * const urshr_fns[4] = {
1109 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1110 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1111};
1112TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1113 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1114
4df37e41
RH
1115static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1116 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1117 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1118};
1119TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1120 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1121
fe7f8dfb
RH
1122/*
1123 *** SVE Bitwise Shift - Predicated Group
1124 */
1125
1126#define DO_ZPZW(NAME, name) \
8e7acb24 1127 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1128 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1129 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1130 }; \
8e7acb24
RH
1131 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1132 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1133
1134DO_ZPZW(ASR, asr)
1135DO_ZPZW(LSR, lsr)
1136DO_ZPZW(LSL, lsl)
1137
1138#undef DO_ZPZW
1139
d9d78dcc
RH
1140/*
1141 *** SVE Bitwise Shift - Unpredicated Group
1142 */
1143
1144static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1145 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1146 int64_t, uint32_t, uint32_t))
1147{
1148 if (a->esz < 0) {
1149 /* Invalid tsz encoding -- see tszimm_esz. */
1150 return false;
1151 }
1152 if (sve_access_check(s)) {
1153 unsigned vsz = vec_full_reg_size(s);
1154 /* Shift by element size is architecturally valid. For
1155 arithmetic right-shift, it's the same as by one less.
1156 Otherwise it is a zeroing operation. */
1157 if (a->imm >= 8 << a->esz) {
1158 if (asr) {
1159 a->imm = (8 << a->esz) - 1;
1160 } else {
1161 do_dupi_z(s, a->rd, 0);
1162 return true;
1163 }
1164 }
1165 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1166 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1167 }
1168 return true;
1169}
1170
5e612f80
RH
1171TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1172TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1173TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1174
d9d78dcc 1175#define DO_ZZW(NAME, name) \
32e2ad65 1176 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1177 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1178 gen_helper_sve_##name##_zzw_s, NULL \
1179 }; \
32e2ad65
RH
1180 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1181 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1182
32e2ad65
RH
1183DO_ZZW(ASR_zzw, asr)
1184DO_ZZW(LSR_zzw, lsr)
1185DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1186
1187#undef DO_ZZW
1188
96a36e4a
RH
1189/*
1190 *** SVE Integer Multiply-Add Group
1191 */
1192
1193static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1194 gen_helper_gvec_5 *fn)
1195{
1196 if (sve_access_check(s)) {
1197 unsigned vsz = vec_full_reg_size(s);
1198 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1199 vec_full_reg_offset(s, a->ra),
1200 vec_full_reg_offset(s, a->rn),
1201 vec_full_reg_offset(s, a->rm),
1202 pred_full_reg_offset(s, a->pg),
1203 vsz, vsz, 0, fn);
1204 }
1205 return true;
1206}
1207
dc67e645
RH
1208static gen_helper_gvec_5 * const mla_fns[4] = {
1209 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1210 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1211};
1212TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1213
dc67e645
RH
1214static gen_helper_gvec_5 * const mls_fns[4] = {
1215 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1216 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1217};
1218TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1219
9a56c9c3
RH
1220/*
1221 *** SVE Index Generation Group
1222 */
1223
6687d05d 1224static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1225 TCGv_i64 start, TCGv_i64 incr)
1226{
6687d05d
RH
1227 unsigned vsz;
1228 TCGv_i32 desc;
1229 TCGv_ptr t_zd;
1230
1231 if (!sve_access_check(s)) {
1232 return true;
1233 }
1234
1235 vsz = vec_full_reg_size(s);
1236 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1237 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1238
1239 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1240 if (esz == 3) {
1241 gen_helper_sve_index_d(t_zd, start, incr, desc);
1242 } else {
1243 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1244 static index_fn * const fns[3] = {
1245 gen_helper_sve_index_b,
1246 gen_helper_sve_index_h,
1247 gen_helper_sve_index_s,
1248 };
1249 TCGv_i32 s32 = tcg_temp_new_i32();
1250 TCGv_i32 i32 = tcg_temp_new_i32();
1251
1252 tcg_gen_extrl_i64_i32(s32, start);
1253 tcg_gen_extrl_i64_i32(i32, incr);
1254 fns[esz](t_zd, s32, i32, desc);
1255
1256 tcg_temp_free_i32(s32);
1257 tcg_temp_free_i32(i32);
1258 }
1259 tcg_temp_free_ptr(t_zd);
6687d05d 1260 return true;
9a56c9c3
RH
1261}
1262
9aa60c83
RH
1263TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1264 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1265TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1266 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1267TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1268 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1269TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1270 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1271
96f922cc
RH
1272/*
1273 *** SVE Stack Allocation Group
1274 */
1275
3a7be554 1276static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1277{
1402a6b8
RH
1278 if (!dc_isar_feature(aa64_sve, s)) {
1279 return false;
1280 }
5de56742
AC
1281 if (sve_access_check(s)) {
1282 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1283 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1284 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1285 }
96f922cc
RH
1286 return true;
1287}
1288
3a7be554 1289static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1290{
1402a6b8
RH
1291 if (!dc_isar_feature(aa64_sve, s)) {
1292 return false;
1293 }
5de56742
AC
1294 if (sve_access_check(s)) {
1295 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1296 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1297 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1298 }
96f922cc
RH
1299 return true;
1300}
1301
3a7be554 1302static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1303{
1402a6b8
RH
1304 if (!dc_isar_feature(aa64_sve, s)) {
1305 return false;
1306 }
5de56742
AC
1307 if (sve_access_check(s)) {
1308 TCGv_i64 reg = cpu_reg(s, a->rd);
1309 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1310 }
96f922cc
RH
1311 return true;
1312}
1313
4b242d9c
RH
1314/*
1315 *** SVE Compute Vector Address Group
1316 */
1317
1318static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1319{
913a8a00 1320 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1321}
1322
7160c8c5
RH
1323TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1324TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1325TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1326TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1327
0762cd42
RH
1328/*
1329 *** SVE Integer Misc - Unpredicated Group
1330 */
1331
0ea3cdbf
RH
1332static gen_helper_gvec_2 * const fexpa_fns[4] = {
1333 NULL, gen_helper_sve_fexpa_h,
1334 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1335};
ca363d23
RH
1336TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
1337 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1338
32e2ad65
RH
1339static gen_helper_gvec_3 * const ftssel_fns[4] = {
1340 NULL, gen_helper_sve_ftssel_h,
1341 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1342};
ca363d23
RH
1343TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
1344 ftssel_fns[a->esz], a, 0)
a1f233f2 1345
516e246a
RH
1346/*
1347 *** SVE Predicate Logical Operations Group
1348 */
1349
1350static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1351 const GVecGen4 *gvec_op)
1352{
1353 if (!sve_access_check(s)) {
1354 return true;
1355 }
1356
1357 unsigned psz = pred_gvec_reg_size(s);
1358 int dofs = pred_full_reg_offset(s, a->rd);
1359 int nofs = pred_full_reg_offset(s, a->rn);
1360 int mofs = pred_full_reg_offset(s, a->rm);
1361 int gofs = pred_full_reg_offset(s, a->pg);
1362
dd81a8d7
RH
1363 if (!a->s) {
1364 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1365 return true;
1366 }
1367
516e246a
RH
1368 if (psz == 8) {
1369 /* Do the operation and the flags generation in temps. */
1370 TCGv_i64 pd = tcg_temp_new_i64();
1371 TCGv_i64 pn = tcg_temp_new_i64();
1372 TCGv_i64 pm = tcg_temp_new_i64();
1373 TCGv_i64 pg = tcg_temp_new_i64();
1374
1375 tcg_gen_ld_i64(pn, cpu_env, nofs);
1376 tcg_gen_ld_i64(pm, cpu_env, mofs);
1377 tcg_gen_ld_i64(pg, cpu_env, gofs);
1378
1379 gvec_op->fni8(pd, pn, pm, pg);
1380 tcg_gen_st_i64(pd, cpu_env, dofs);
1381
1382 do_predtest1(pd, pg);
1383
1384 tcg_temp_free_i64(pd);
1385 tcg_temp_free_i64(pn);
1386 tcg_temp_free_i64(pm);
1387 tcg_temp_free_i64(pg);
1388 } else {
1389 /* The operation and flags generation is large. The computation
1390 * of the flags depends on the original contents of the guarding
1391 * predicate. If the destination overwrites the guarding predicate,
1392 * then the easiest way to get this right is to save a copy.
1393 */
1394 int tofs = gofs;
1395 if (a->rd == a->pg) {
1396 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1397 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1398 }
1399
1400 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1401 do_predtest(s, dofs, tofs, psz / 8);
1402 }
1403 return true;
1404}
1405
1406static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1407{
1408 tcg_gen_and_i64(pd, pn, pm);
1409 tcg_gen_and_i64(pd, pd, pg);
1410}
1411
1412static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1413 TCGv_vec pm, TCGv_vec pg)
1414{
1415 tcg_gen_and_vec(vece, pd, pn, pm);
1416 tcg_gen_and_vec(vece, pd, pd, pg);
1417}
1418
3a7be554 1419static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1420{
1421 static const GVecGen4 op = {
1422 .fni8 = gen_and_pg_i64,
1423 .fniv = gen_and_pg_vec,
1424 .fno = gen_helper_sve_and_pppp,
1425 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1426 };
dd81a8d7 1427
1402a6b8
RH
1428 if (!dc_isar_feature(aa64_sve, s)) {
1429 return false;
1430 }
dd81a8d7 1431 if (!a->s) {
dd81a8d7
RH
1432 if (a->rn == a->rm) {
1433 if (a->pg == a->rn) {
23e5fa5f 1434 return do_mov_p(s, a->rd, a->rn);
dd81a8d7 1435 }
23e5fa5f 1436 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
dd81a8d7 1437 } else if (a->pg == a->rn || a->pg == a->rm) {
23e5fa5f 1438 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
516e246a 1439 }
516e246a 1440 }
dd81a8d7 1441 return do_pppp_flags(s, a, &op);
516e246a
RH
1442}
1443
1444static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1445{
1446 tcg_gen_andc_i64(pd, pn, pm);
1447 tcg_gen_and_i64(pd, pd, pg);
1448}
1449
1450static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1451 TCGv_vec pm, TCGv_vec pg)
1452{
1453 tcg_gen_andc_vec(vece, pd, pn, pm);
1454 tcg_gen_and_vec(vece, pd, pd, pg);
1455}
1456
3a7be554 1457static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1458{
1459 static const GVecGen4 op = {
1460 .fni8 = gen_bic_pg_i64,
1461 .fniv = gen_bic_pg_vec,
1462 .fno = gen_helper_sve_bic_pppp,
1463 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1464 };
dd81a8d7 1465
1402a6b8
RH
1466 if (!dc_isar_feature(aa64_sve, s)) {
1467 return false;
1468 }
dd81a8d7 1469 if (!a->s && a->pg == a->rn) {
23e5fa5f 1470 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
516e246a 1471 }
dd81a8d7 1472 return do_pppp_flags(s, a, &op);
516e246a
RH
1473}
1474
1475static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1476{
1477 tcg_gen_xor_i64(pd, pn, pm);
1478 tcg_gen_and_i64(pd, pd, pg);
1479}
1480
1481static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1482 TCGv_vec pm, TCGv_vec pg)
1483{
1484 tcg_gen_xor_vec(vece, pd, pn, pm);
1485 tcg_gen_and_vec(vece, pd, pd, pg);
1486}
1487
3a7be554 1488static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1489{
1490 static const GVecGen4 op = {
1491 .fni8 = gen_eor_pg_i64,
1492 .fniv = gen_eor_pg_vec,
1493 .fno = gen_helper_sve_eor_pppp,
1494 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1495 };
738b679c 1496
1402a6b8
RH
1497 if (!dc_isar_feature(aa64_sve, s)) {
1498 return false;
1499 }
738b679c
RH
1500 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1501 if (!a->s && a->pg == a->rm) {
1502 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1503 }
dd81a8d7 1504 return do_pppp_flags(s, a, &op);
516e246a
RH
1505}
1506
3a7be554 1507static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1508{
1402a6b8 1509 if (a->s || !dc_isar_feature(aa64_sve, s)) {
516e246a 1510 return false;
516e246a 1511 }
d4bc6232
RH
1512 if (sve_access_check(s)) {
1513 unsigned psz = pred_gvec_reg_size(s);
1514 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1515 pred_full_reg_offset(s, a->pg),
1516 pred_full_reg_offset(s, a->rn),
1517 pred_full_reg_offset(s, a->rm), psz, psz);
1518 }
1519 return true;
516e246a
RH
1520}
1521
1522static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1523{
1524 tcg_gen_or_i64(pd, pn, pm);
1525 tcg_gen_and_i64(pd, pd, pg);
1526}
1527
1528static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1529 TCGv_vec pm, TCGv_vec pg)
1530{
1531 tcg_gen_or_vec(vece, pd, pn, pm);
1532 tcg_gen_and_vec(vece, pd, pd, pg);
1533}
1534
3a7be554 1535static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1536{
1537 static const GVecGen4 op = {
1538 .fni8 = gen_orr_pg_i64,
1539 .fniv = gen_orr_pg_vec,
1540 .fno = gen_helper_sve_orr_pppp,
1541 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1542 };
dd81a8d7 1543
1402a6b8
RH
1544 if (!dc_isar_feature(aa64_sve, s)) {
1545 return false;
1546 }
dd81a8d7 1547 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1548 return do_mov_p(s, a->rd, a->rn);
516e246a 1549 }
dd81a8d7 1550 return do_pppp_flags(s, a, &op);
516e246a
RH
1551}
1552
1553static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1554{
1555 tcg_gen_orc_i64(pd, pn, pm);
1556 tcg_gen_and_i64(pd, pd, pg);
1557}
1558
1559static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1560 TCGv_vec pm, TCGv_vec pg)
1561{
1562 tcg_gen_orc_vec(vece, pd, pn, pm);
1563 tcg_gen_and_vec(vece, pd, pd, pg);
1564}
1565
3a7be554 1566static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1567{
1568 static const GVecGen4 op = {
1569 .fni8 = gen_orn_pg_i64,
1570 .fniv = gen_orn_pg_vec,
1571 .fno = gen_helper_sve_orn_pppp,
1572 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1573 };
1402a6b8
RH
1574
1575 if (!dc_isar_feature(aa64_sve, s)) {
1576 return false;
1577 }
dd81a8d7 1578 return do_pppp_flags(s, a, &op);
516e246a
RH
1579}
1580
1581static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1582{
1583 tcg_gen_or_i64(pd, pn, pm);
1584 tcg_gen_andc_i64(pd, pg, pd);
1585}
1586
1587static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1588 TCGv_vec pm, TCGv_vec pg)
1589{
1590 tcg_gen_or_vec(vece, pd, pn, pm);
1591 tcg_gen_andc_vec(vece, pd, pg, pd);
1592}
1593
3a7be554 1594static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1595{
1596 static const GVecGen4 op = {
1597 .fni8 = gen_nor_pg_i64,
1598 .fniv = gen_nor_pg_vec,
1599 .fno = gen_helper_sve_nor_pppp,
1600 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1601 };
1402a6b8
RH
1602
1603 if (!dc_isar_feature(aa64_sve, s)) {
1604 return false;
1605 }
dd81a8d7 1606 return do_pppp_flags(s, a, &op);
516e246a
RH
1607}
1608
1609static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1610{
1611 tcg_gen_and_i64(pd, pn, pm);
1612 tcg_gen_andc_i64(pd, pg, pd);
1613}
1614
1615static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1616 TCGv_vec pm, TCGv_vec pg)
1617{
1618 tcg_gen_and_vec(vece, pd, pn, pm);
1619 tcg_gen_andc_vec(vece, pd, pg, pd);
1620}
1621
3a7be554 1622static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1623{
1624 static const GVecGen4 op = {
1625 .fni8 = gen_nand_pg_i64,
1626 .fniv = gen_nand_pg_vec,
1627 .fno = gen_helper_sve_nand_pppp,
1628 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1629 };
1402a6b8
RH
1630
1631 if (!dc_isar_feature(aa64_sve, s)) {
1632 return false;
1633 }
dd81a8d7 1634 return do_pppp_flags(s, a, &op);
516e246a
RH
1635}
1636
9e18d7a6
RH
1637/*
1638 *** SVE Predicate Misc Group
1639 */
1640
3a7be554 1641static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6 1642{
1402a6b8
RH
1643 if (!dc_isar_feature(aa64_sve, s)) {
1644 return false;
1645 }
9e18d7a6
RH
1646 if (sve_access_check(s)) {
1647 int nofs = pred_full_reg_offset(s, a->rn);
1648 int gofs = pred_full_reg_offset(s, a->pg);
1649 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1650
1651 if (words == 1) {
1652 TCGv_i64 pn = tcg_temp_new_i64();
1653 TCGv_i64 pg = tcg_temp_new_i64();
1654
1655 tcg_gen_ld_i64(pn, cpu_env, nofs);
1656 tcg_gen_ld_i64(pg, cpu_env, gofs);
1657 do_predtest1(pn, pg);
1658
1659 tcg_temp_free_i64(pn);
1660 tcg_temp_free_i64(pg);
1661 } else {
1662 do_predtest(s, nofs, gofs, words);
1663 }
1664 }
1665 return true;
1666}
1667
028e2a7b
RH
1668/* See the ARM pseudocode DecodePredCount. */
1669static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1670{
1671 unsigned elements = fullsz >> esz;
1672 unsigned bound;
1673
1674 switch (pattern) {
1675 case 0x0: /* POW2 */
1676 return pow2floor(elements);
1677 case 0x1: /* VL1 */
1678 case 0x2: /* VL2 */
1679 case 0x3: /* VL3 */
1680 case 0x4: /* VL4 */
1681 case 0x5: /* VL5 */
1682 case 0x6: /* VL6 */
1683 case 0x7: /* VL7 */
1684 case 0x8: /* VL8 */
1685 bound = pattern;
1686 break;
1687 case 0x9: /* VL16 */
1688 case 0xa: /* VL32 */
1689 case 0xb: /* VL64 */
1690 case 0xc: /* VL128 */
1691 case 0xd: /* VL256 */
1692 bound = 16 << (pattern - 9);
1693 break;
1694 case 0x1d: /* MUL4 */
1695 return elements - elements % 4;
1696 case 0x1e: /* MUL3 */
1697 return elements - elements % 3;
1698 case 0x1f: /* ALL */
1699 return elements;
1700 default: /* #uimm5 */
1701 return 0;
1702 }
1703 return elements >= bound ? bound : 0;
1704}
1705
1706/* This handles all of the predicate initialization instructions,
1707 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1708 * so that decode_pred_count returns 0. For SETFFR, we will have
1709 * set RD == 16 == FFR.
1710 */
1711static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1712{
1713 if (!sve_access_check(s)) {
1714 return true;
1715 }
1716
1717 unsigned fullsz = vec_full_reg_size(s);
1718 unsigned ofs = pred_full_reg_offset(s, rd);
1719 unsigned numelem, setsz, i;
1720 uint64_t word, lastword;
1721 TCGv_i64 t;
1722
1723 numelem = decode_pred_count(fullsz, pat, esz);
1724
1725 /* Determine what we must store into each bit, and how many. */
1726 if (numelem == 0) {
1727 lastword = word = 0;
1728 setsz = fullsz;
1729 } else {
1730 setsz = numelem << esz;
1731 lastword = word = pred_esz_masks[esz];
1732 if (setsz % 64) {
973558a3 1733 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1734 }
1735 }
1736
1737 t = tcg_temp_new_i64();
1738 if (fullsz <= 64) {
1739 tcg_gen_movi_i64(t, lastword);
1740 tcg_gen_st_i64(t, cpu_env, ofs);
1741 goto done;
1742 }
1743
1744 if (word == lastword) {
1745 unsigned maxsz = size_for_gvec(fullsz / 8);
1746 unsigned oprsz = size_for_gvec(setsz / 8);
1747
1748 if (oprsz * 8 == setsz) {
8711e71f 1749 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1750 goto done;
1751 }
028e2a7b
RH
1752 }
1753
1754 setsz /= 8;
1755 fullsz /= 8;
1756
1757 tcg_gen_movi_i64(t, word);
973558a3 1758 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1759 tcg_gen_st_i64(t, cpu_env, ofs + i);
1760 }
1761 if (lastword != word) {
1762 tcg_gen_movi_i64(t, lastword);
1763 tcg_gen_st_i64(t, cpu_env, ofs + i);
1764 i += 8;
1765 }
1766 if (i < fullsz) {
1767 tcg_gen_movi_i64(t, 0);
1768 for (; i < fullsz; i += 8) {
1769 tcg_gen_st_i64(t, cpu_env, ofs + i);
1770 }
1771 }
1772
1773 done:
1774 tcg_temp_free_i64(t);
1775
1776 /* PTRUES */
1777 if (setflag) {
1778 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1779 tcg_gen_movi_i32(cpu_CF, word == 0);
1780 tcg_gen_movi_i32(cpu_VF, 0);
1781 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1782 }
1783 return true;
1784}
1785
b03a8501 1786TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1787
b03a8501 1788/* Note pat == 31 is #all, to set all elements. */
39001c6b
RH
1789TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
1790 do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1791
b03a8501
RH
1792/* Note pat == 32 is #unimp, to set no elements. */
1793TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1794
3a7be554 1795static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1796{
1797 /* The path through do_pppp_flags is complicated enough to want to avoid
1798 * duplication. Frob the arguments into the form of a predicated AND.
1799 */
1800 arg_rprr_s alt_a = {
1801 .rd = a->rd, .pg = a->pg, .s = a->s,
1802 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1803 };
39001c6b
RH
1804
1805 s->is_nonstreaming = true;
3a7be554 1806 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1807}
1808
39001c6b
RH
1809TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1810TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1811
1812static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1813 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1814 TCGv_ptr, TCGv_i32))
1815{
1816 if (!sve_access_check(s)) {
1817 return true;
1818 }
1819
1820 TCGv_ptr t_pd = tcg_temp_new_ptr();
1821 TCGv_ptr t_pg = tcg_temp_new_ptr();
1822 TCGv_i32 t;
86300b5d 1823 unsigned desc = 0;
028e2a7b 1824
86300b5d
RH
1825 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1826 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1827
1828 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1829 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1830 t = tcg_temp_new_i32();
028e2a7b 1831
392acacc 1832 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1833 tcg_temp_free_ptr(t_pd);
1834 tcg_temp_free_ptr(t_pg);
1835
1836 do_pred_flags(t);
1837 tcg_temp_free_i32(t);
1838 return true;
1839}
1840
d95040e3
RH
1841TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1842TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1843
24e82e68
RH
1844/*
1845 *** SVE Element Count Group
1846 */
1847
1848/* Perform an inline saturating addition of a 32-bit value within
1849 * a 64-bit register. The second operand is known to be positive,
1850 * which halves the comparisions we must perform to bound the result.
1851 */
1852static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1853{
1854 int64_t ibound;
24e82e68
RH
1855
1856 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1857 if (u) {
1858 tcg_gen_ext32u_i64(reg, reg);
1859 } else {
1860 tcg_gen_ext32s_i64(reg, reg);
1861 }
1862 if (d) {
1863 tcg_gen_sub_i64(reg, reg, val);
1864 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1865 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1866 } else {
1867 tcg_gen_add_i64(reg, reg, val);
1868 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1869 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1870 }
24e82e68
RH
1871}
1872
1873/* Similarly with 64-bit values. */
1874static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1875{
1876 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1877 TCGv_i64 t2;
1878
1879 if (u) {
1880 if (d) {
1881 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1882 t2 = tcg_constant_i64(0);
1883 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1884 } else {
1885 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1886 t2 = tcg_constant_i64(-1);
1887 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1888 }
1889 } else {
35a1ec8e 1890 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1891 if (d) {
1892 /* Detect signed overflow for subtraction. */
1893 tcg_gen_xor_i64(t0, reg, val);
1894 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1895 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1896 tcg_gen_and_i64(t0, t0, reg);
1897
1898 /* Bound the result. */
1899 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1900 t2 = tcg_constant_i64(0);
24e82e68
RH
1901 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1902 } else {
1903 /* Detect signed overflow for addition. */
1904 tcg_gen_xor_i64(t0, reg, val);
1905 tcg_gen_add_i64(reg, reg, val);
1906 tcg_gen_xor_i64(t1, reg, val);
1907 tcg_gen_andc_i64(t0, t1, t0);
1908
1909 /* Bound the result. */
1910 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1911 t2 = tcg_constant_i64(0);
24e82e68
RH
1912 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1913 }
35a1ec8e 1914 tcg_temp_free_i64(t1);
24e82e68
RH
1915 }
1916 tcg_temp_free_i64(t0);
24e82e68
RH
1917}
1918
1919/* Similarly with a vector and a scalar operand. */
1920static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1921 TCGv_i64 val, bool u, bool d)
1922{
1923 unsigned vsz = vec_full_reg_size(s);
1924 TCGv_ptr dptr, nptr;
1925 TCGv_i32 t32, desc;
1926 TCGv_i64 t64;
1927
1928 dptr = tcg_temp_new_ptr();
1929 nptr = tcg_temp_new_ptr();
1930 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1931 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1932 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1933
1934 switch (esz) {
1935 case MO_8:
1936 t32 = tcg_temp_new_i32();
1937 tcg_gen_extrl_i64_i32(t32, val);
1938 if (d) {
1939 tcg_gen_neg_i32(t32, t32);
1940 }
1941 if (u) {
1942 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1943 } else {
1944 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1945 }
1946 tcg_temp_free_i32(t32);
1947 break;
1948
1949 case MO_16:
1950 t32 = tcg_temp_new_i32();
1951 tcg_gen_extrl_i64_i32(t32, val);
1952 if (d) {
1953 tcg_gen_neg_i32(t32, t32);
1954 }
1955 if (u) {
1956 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1957 } else {
1958 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1959 }
1960 tcg_temp_free_i32(t32);
1961 break;
1962
1963 case MO_32:
1964 t64 = tcg_temp_new_i64();
1965 if (d) {
1966 tcg_gen_neg_i64(t64, val);
1967 } else {
1968 tcg_gen_mov_i64(t64, val);
1969 }
1970 if (u) {
1971 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1972 } else {
1973 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1974 }
1975 tcg_temp_free_i64(t64);
1976 break;
1977
1978 case MO_64:
1979 if (u) {
1980 if (d) {
1981 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1982 } else {
1983 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1984 }
1985 } else if (d) {
1986 t64 = tcg_temp_new_i64();
1987 tcg_gen_neg_i64(t64, val);
1988 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1989 tcg_temp_free_i64(t64);
1990 } else {
1991 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1992 }
1993 break;
1994
1995 default:
1996 g_assert_not_reached();
1997 }
1998
1999 tcg_temp_free_ptr(dptr);
2000 tcg_temp_free_ptr(nptr);
24e82e68
RH
2001}
2002
3a7be554 2003static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68 2004{
1402a6b8
RH
2005 if (!dc_isar_feature(aa64_sve, s)) {
2006 return false;
2007 }
24e82e68
RH
2008 if (sve_access_check(s)) {
2009 unsigned fullsz = vec_full_reg_size(s);
2010 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2011 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2012 }
2013 return true;
2014}
2015
3a7be554 2016static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68 2017{
1402a6b8
RH
2018 if (!dc_isar_feature(aa64_sve, s)) {
2019 return false;
2020 }
24e82e68
RH
2021 if (sve_access_check(s)) {
2022 unsigned fullsz = vec_full_reg_size(s);
2023 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2024 int inc = numelem * a->imm * (a->d ? -1 : 1);
2025 TCGv_i64 reg = cpu_reg(s, a->rd);
2026
2027 tcg_gen_addi_i64(reg, reg, inc);
2028 }
2029 return true;
2030}
2031
3a7be554 2032static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68 2033{
1402a6b8
RH
2034 if (!dc_isar_feature(aa64_sve, s)) {
2035 return false;
2036 }
24e82e68
RH
2037 if (!sve_access_check(s)) {
2038 return true;
2039 }
2040
2041 unsigned fullsz = vec_full_reg_size(s);
2042 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2043 int inc = numelem * a->imm;
2044 TCGv_i64 reg = cpu_reg(s, a->rd);
2045
2046 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2047 if (inc == 0) {
2048 if (a->u) {
2049 tcg_gen_ext32u_i64(reg, reg);
2050 } else {
2051 tcg_gen_ext32s_i64(reg, reg);
2052 }
2053 } else {
d681f125 2054 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2055 }
2056 return true;
2057}
2058
3a7be554 2059static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68 2060{
1402a6b8
RH
2061 if (!dc_isar_feature(aa64_sve, s)) {
2062 return false;
2063 }
24e82e68
RH
2064 if (!sve_access_check(s)) {
2065 return true;
2066 }
2067
2068 unsigned fullsz = vec_full_reg_size(s);
2069 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2070 int inc = numelem * a->imm;
2071 TCGv_i64 reg = cpu_reg(s, a->rd);
2072
2073 if (inc != 0) {
d681f125 2074 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2075 }
2076 return true;
2077}
2078
3a7be554 2079static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68 2080{
1402a6b8 2081 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
24e82e68
RH
2082 return false;
2083 }
2084
2085 unsigned fullsz = vec_full_reg_size(s);
2086 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2087 int inc = numelem * a->imm;
2088
2089 if (inc != 0) {
2090 if (sve_access_check(s)) {
24e82e68
RH
2091 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2092 vec_full_reg_offset(s, a->rn),
d681f125
RH
2093 tcg_constant_i64(a->d ? -inc : inc),
2094 fullsz, fullsz);
24e82e68
RH
2095 }
2096 } else {
2097 do_mov_z(s, a->rd, a->rn);
2098 }
2099 return true;
2100}
2101
3a7be554 2102static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68 2103{
1402a6b8 2104 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
24e82e68
RH
2105 return false;
2106 }
2107
2108 unsigned fullsz = vec_full_reg_size(s);
2109 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2110 int inc = numelem * a->imm;
2111
2112 if (inc != 0) {
2113 if (sve_access_check(s)) {
d681f125
RH
2114 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2115 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2116 }
2117 } else {
2118 do_mov_z(s, a->rd, a->rn);
2119 }
2120 return true;
2121}
2122
e1fa1164
RH
2123/*
2124 *** SVE Bitwise Immediate Group
2125 */
2126
2127static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2128{
2129 uint64_t imm;
2130 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2131 extract32(a->dbm, 0, 6),
2132 extract32(a->dbm, 6, 6))) {
2133 return false;
2134 }
faf915e2 2135 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2136}
2137
15a314da
RH
2138TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2139TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2140TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2141
3a7be554 2142static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2143{
2144 uint64_t imm;
1402a6b8
RH
2145
2146 if (!dc_isar_feature(aa64_sve, s)) {
2147 return false;
2148 }
e1fa1164
RH
2149 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2150 extract32(a->dbm, 0, 6),
2151 extract32(a->dbm, 6, 6))) {
2152 return false;
2153 }
2154 if (sve_access_check(s)) {
2155 do_dupi_z(s, a->rd, imm);
2156 }
2157 return true;
2158}
2159
f25a2361
RH
2160/*
2161 *** SVE Integer Wide Immediate - Predicated Group
2162 */
2163
2164/* Implement all merging copies. This is used for CPY (immediate),
2165 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2166 */
2167static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2168 TCGv_i64 val)
2169{
2170 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2171 static gen_cpy * const fns[4] = {
2172 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2173 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2174 };
2175 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2176 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2177 TCGv_ptr t_zd = tcg_temp_new_ptr();
2178 TCGv_ptr t_zn = tcg_temp_new_ptr();
2179 TCGv_ptr t_pg = tcg_temp_new_ptr();
2180
2181 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2182 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2183 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2184
2185 fns[esz](t_zd, t_zn, t_pg, val, desc);
2186
2187 tcg_temp_free_ptr(t_zd);
2188 tcg_temp_free_ptr(t_zn);
2189 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2190}
2191
3a7be554 2192static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361 2193{
1402a6b8 2194 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
f25a2361
RH
2195 return false;
2196 }
2197 if (sve_access_check(s)) {
2198 /* Decode the VFP immediate. */
2199 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2200 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2201 }
2202 return true;
2203}
2204
3a7be554 2205static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2206{
1402a6b8
RH
2207 if (!dc_isar_feature(aa64_sve, s)) {
2208 return false;
2209 }
f25a2361 2210 if (sve_access_check(s)) {
e152b48b 2211 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2212 }
2213 return true;
2214}
2215
3a7be554 2216static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2217{
2218 static gen_helper_gvec_2i * const fns[4] = {
2219 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2220 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2221 };
2222
1402a6b8
RH
2223 if (!dc_isar_feature(aa64_sve, s)) {
2224 return false;
2225 }
f25a2361
RH
2226 if (sve_access_check(s)) {
2227 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2228 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2229 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2230 tcg_constant_i64(a->imm),
2231 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2232 }
2233 return true;
2234}
2235
b94f8f60
RH
2236/*
2237 *** SVE Permute Extract Group
2238 */
2239
75114792 2240static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2241{
2242 if (!sve_access_check(s)) {
2243 return true;
2244 }
2245
2246 unsigned vsz = vec_full_reg_size(s);
75114792 2247 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2248 unsigned n_siz = vsz - n_ofs;
75114792
SL
2249 unsigned d = vec_full_reg_offset(s, rd);
2250 unsigned n = vec_full_reg_offset(s, rn);
2251 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2252
2253 /* Use host vector move insns if we have appropriate sizes
2254 * and no unfortunate overlap.
2255 */
2256 if (m != d
2257 && n_ofs == size_for_gvec(n_ofs)
2258 && n_siz == size_for_gvec(n_siz)
2259 && (d != n || n_siz <= n_ofs)) {
2260 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2261 if (n_ofs != 0) {
2262 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2263 }
2264 } else {
2265 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2266 }
2267 return true;
2268}
2269
c799c115
RH
2270TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2271TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2272
30562ab7
RH
2273/*
2274 *** SVE Permute - Unpredicated Group
2275 */
2276
3a7be554 2277static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7 2278{
1402a6b8
RH
2279 if (!dc_isar_feature(aa64_sve, s)) {
2280 return false;
2281 }
30562ab7
RH
2282 if (sve_access_check(s)) {
2283 unsigned vsz = vec_full_reg_size(s);
2284 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2285 vsz, vsz, cpu_reg_sp(s, a->rn));
2286 }
2287 return true;
2288}
2289
3a7be554 2290static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7 2291{
1402a6b8
RH
2292 if (!dc_isar_feature(aa64_sve, s)) {
2293 return false;
2294 }
30562ab7
RH
2295 if ((a->imm & 0x1f) == 0) {
2296 return false;
2297 }
2298 if (sve_access_check(s)) {
2299 unsigned vsz = vec_full_reg_size(s);
2300 unsigned dofs = vec_full_reg_offset(s, a->rd);
2301 unsigned esz, index;
2302
2303 esz = ctz32(a->imm);
2304 index = a->imm >> (esz + 1);
2305
2306 if ((index << esz) < vsz) {
2307 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2308 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2309 } else {
7e17d50e
RH
2310 /*
2311 * While dup_mem handles 128-bit elements, dup_imm does not.
2312 * Thankfully element size doesn't matter for splatting zero.
2313 */
2314 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2315 }
2316 }
2317 return true;
2318}
2319
2320static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2321{
2322 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2323 static gen_insr * const fns[4] = {
2324 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2325 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2326 };
2327 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2328 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2329 TCGv_ptr t_zd = tcg_temp_new_ptr();
2330 TCGv_ptr t_zn = tcg_temp_new_ptr();
2331
2332 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2333 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2334
2335 fns[a->esz](t_zd, t_zn, val, desc);
2336
2337 tcg_temp_free_ptr(t_zd);
2338 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2339}
2340
3a7be554 2341static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7 2342{
1402a6b8
RH
2343 if (!dc_isar_feature(aa64_sve, s)) {
2344 return false;
2345 }
30562ab7
RH
2346 if (sve_access_check(s)) {
2347 TCGv_i64 t = tcg_temp_new_i64();
2348 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2349 do_insr_i64(s, a, t);
2350 tcg_temp_free_i64(t);
2351 }
2352 return true;
2353}
2354
3a7be554 2355static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7 2356{
1402a6b8
RH
2357 if (!dc_isar_feature(aa64_sve, s)) {
2358 return false;
2359 }
30562ab7
RH
2360 if (sve_access_check(s)) {
2361 do_insr_i64(s, a, cpu_reg(s, a->rm));
2362 }
2363 return true;
2364}
2365
0ea3cdbf
RH
2366static gen_helper_gvec_2 * const rev_fns[4] = {
2367 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2368 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2369};
2370TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2371
32e2ad65
RH
2372static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2373 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2374 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2375};
2376TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2377
5f425b92
RH
2378static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2379 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2380 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2381};
2382TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2383 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2384
32e2ad65
RH
2385static gen_helper_gvec_3 * const tbx_fns[4] = {
2386 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2387 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2388};
2389TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2390
3a7be554 2391static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2392{
2393 static gen_helper_gvec_2 * const fns[4][2] = {
2394 { NULL, NULL },
2395 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2396 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2397 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2398 };
2399
1402a6b8 2400 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
30562ab7
RH
2401 return false;
2402 }
2403 if (sve_access_check(s)) {
2404 unsigned vsz = vec_full_reg_size(s);
2405 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2406 vec_full_reg_offset(s, a->rn)
2407 + (a->h ? vsz / 2 : 0),
2408 vsz, vsz, 0, fns[a->esz][a->u]);
2409 }
2410 return true;
2411}
2412
d731d8cb
RH
2413/*
2414 *** SVE Permute - Predicates Group
2415 */
2416
2417static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2418 gen_helper_gvec_3 *fn)
2419{
2420 if (!sve_access_check(s)) {
2421 return true;
2422 }
2423
2424 unsigned vsz = pred_full_reg_size(s);
2425
d731d8cb
RH
2426 TCGv_ptr t_d = tcg_temp_new_ptr();
2427 TCGv_ptr t_n = tcg_temp_new_ptr();
2428 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2429 uint32_t desc = 0;
d731d8cb 2430
f9b0fcce
RH
2431 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2432 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2433 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2434
2435 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2436 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2437 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2438
c6a59b55 2439 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2440
2441 tcg_temp_free_ptr(t_d);
2442 tcg_temp_free_ptr(t_n);
2443 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2444 return true;
2445}
2446
2447static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2448 gen_helper_gvec_2 *fn)
2449{
2450 if (!sve_access_check(s)) {
2451 return true;
2452 }
2453
2454 unsigned vsz = pred_full_reg_size(s);
2455 TCGv_ptr t_d = tcg_temp_new_ptr();
2456 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2457 uint32_t desc = 0;
d731d8cb
RH
2458
2459 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2460 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2461
70acaafe
RH
2462 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2463 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2464 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2465
c6a59b55 2466 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2467
d731d8cb
RH
2468 tcg_temp_free_ptr(t_d);
2469 tcg_temp_free_ptr(t_n);
2470 return true;
2471}
2472
bdb349f5
RH
2473TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2474TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2475TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2476TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2477TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2478TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2479
1d0fce4b
RH
2480TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2481TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2482TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2483
234b48e9
RH
2484/*
2485 *** SVE Permute - Interleaving Group
2486 */
2487
a95b9618
RH
2488static gen_helper_gvec_3 * const zip_fns[4] = {
2489 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2490 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2491};
2492TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2493 zip_fns[a->esz], a, 0)
2494TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2495 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2496
2497TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2498 gen_helper_sve2_zip_q, a, 0)
2499TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2500 gen_helper_sve2_zip_q, a,
2501 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2502
234b48e9
RH
2503static gen_helper_gvec_3 * const uzp_fns[4] = {
2504 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2505 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2506};
2507
32e2ad65
RH
2508TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2509 uzp_fns[a->esz], a, 0)
2510TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2511 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2512
32e2ad65
RH
2513TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2514 gen_helper_sve2_uzp_q, a, 0)
2515TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2516 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2517
234b48e9
RH
2518static gen_helper_gvec_3 * const trn_fns[4] = {
2519 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2520 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2521};
2522
32e2ad65
RH
2523TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2524 trn_fns[a->esz], a, 0)
2525TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2526 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2527
32e2ad65
RH
2528TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2529 gen_helper_sve2_trn_q, a, 0)
2530TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2531 gen_helper_sve2_trn_q, a, 16)
74b64b25 2532
3ca879ae
RH
2533/*
2534 *** SVE Permute Vector - Predicated Group
2535 */
2536
817bd5c9
RH
2537static gen_helper_gvec_3 * const compact_fns[4] = {
2538 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2539};
ca363d23
RH
2540TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
2541 compact_fns[a->esz], a, 0)
3ca879ae 2542
ef23cb72
RH
2543/* Call the helper that computes the ARM LastActiveElement pseudocode
2544 * function, scaled by the element size. This includes the not found
2545 * indication; e.g. not found for esz=3 is -8.
2546 */
2547static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2548{
2549 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2550 * round up, as we do elsewhere, because we need the exact size.
2551 */
2552 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2553 unsigned desc = 0;
ef23cb72 2554
2acbfbe4
RH
2555 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2556 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2557
2558 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2559
c6a59b55 2560 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2561
ef23cb72
RH
2562 tcg_temp_free_ptr(t_p);
2563}
2564
2565/* Increment LAST to the offset of the next element in the vector,
2566 * wrapping around to 0.
2567 */
2568static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2569{
2570 unsigned vsz = vec_full_reg_size(s);
2571
2572 tcg_gen_addi_i32(last, last, 1 << esz);
2573 if (is_power_of_2(vsz)) {
2574 tcg_gen_andi_i32(last, last, vsz - 1);
2575 } else {
4b308bd5
RH
2576 TCGv_i32 max = tcg_constant_i32(vsz);
2577 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2578 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2579 }
2580}
2581
2582/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2583static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2584{
2585 unsigned vsz = vec_full_reg_size(s);
2586
2587 if (is_power_of_2(vsz)) {
2588 tcg_gen_andi_i32(last, last, vsz - 1);
2589 } else {
4b308bd5
RH
2590 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2591 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2592 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2593 }
2594}
2595
2596/* Load an unsigned element of ESZ from BASE+OFS. */
2597static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2598{
2599 TCGv_i64 r = tcg_temp_new_i64();
2600
2601 switch (esz) {
2602 case 0:
2603 tcg_gen_ld8u_i64(r, base, ofs);
2604 break;
2605 case 1:
2606 tcg_gen_ld16u_i64(r, base, ofs);
2607 break;
2608 case 2:
2609 tcg_gen_ld32u_i64(r, base, ofs);
2610 break;
2611 case 3:
2612 tcg_gen_ld_i64(r, base, ofs);
2613 break;
2614 default:
2615 g_assert_not_reached();
2616 }
2617 return r;
2618}
2619
2620/* Load an unsigned element of ESZ from RM[LAST]. */
2621static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2622 int rm, int esz)
2623{
2624 TCGv_ptr p = tcg_temp_new_ptr();
2625 TCGv_i64 r;
2626
2627 /* Convert offset into vector into offset into ENV.
2628 * The final adjustment for the vector register base
2629 * is added via constant offset to the load.
2630 */
e03b5686 2631#if HOST_BIG_ENDIAN
ef23cb72
RH
2632 /* Adjust for element ordering. See vec_reg_offset. */
2633 if (esz < 3) {
2634 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2635 }
2636#endif
2637 tcg_gen_ext_i32_ptr(p, last);
2638 tcg_gen_add_ptr(p, p, cpu_env);
2639
2640 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2641 tcg_temp_free_ptr(p);
2642
2643 return r;
2644}
2645
2646/* Compute CLAST for a Zreg. */
2647static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2648{
2649 TCGv_i32 last;
2650 TCGLabel *over;
2651 TCGv_i64 ele;
2652 unsigned vsz, esz = a->esz;
2653
2654 if (!sve_access_check(s)) {
2655 return true;
2656 }
2657
2658 last = tcg_temp_local_new_i32();
2659 over = gen_new_label();
2660
2661 find_last_active(s, last, esz, a->pg);
2662
2663 /* There is of course no movcond for a 2048-bit vector,
2664 * so we must branch over the actual store.
2665 */
2666 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2667
2668 if (!before) {
2669 incr_last_active(s, last, esz);
2670 }
2671
2672 ele = load_last_active(s, last, a->rm, esz);
2673 tcg_temp_free_i32(last);
2674
2675 vsz = vec_full_reg_size(s);
2676 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2677 tcg_temp_free_i64(ele);
2678
2679 /* If this insn used MOVPRFX, we may need a second move. */
2680 if (a->rd != a->rn) {
2681 TCGLabel *done = gen_new_label();
2682 tcg_gen_br(done);
2683
2684 gen_set_label(over);
2685 do_mov_z(s, a->rd, a->rn);
2686
2687 gen_set_label(done);
2688 } else {
2689 gen_set_label(over);
2690 }
2691 return true;
2692}
2693
db7fa5d8
RH
2694TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2695TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2696
2697/* Compute CLAST for a scalar. */
2698static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2699 bool before, TCGv_i64 reg_val)
2700{
2701 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2702 TCGv_i64 ele, cmp;
ef23cb72
RH
2703
2704 find_last_active(s, last, esz, pg);
2705
2706 /* Extend the original value of last prior to incrementing. */
2707 cmp = tcg_temp_new_i64();
2708 tcg_gen_ext_i32_i64(cmp, last);
2709
2710 if (!before) {
2711 incr_last_active(s, last, esz);
2712 }
2713
2714 /* The conceit here is that while last < 0 indicates not found, after
2715 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2716 * from which we can load garbage. We then discard the garbage with
2717 * a conditional move.
2718 */
2719 ele = load_last_active(s, last, rm, esz);
2720 tcg_temp_free_i32(last);
2721
053552d3
RH
2722 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2723 ele, reg_val);
ef23cb72 2724
ef23cb72
RH
2725 tcg_temp_free_i64(cmp);
2726 tcg_temp_free_i64(ele);
2727}
2728
2729/* Compute CLAST for a Vreg. */
2730static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2731{
2732 if (sve_access_check(s)) {
2733 int esz = a->esz;
2734 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2735 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2736
2737 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2738 write_fp_dreg(s, a->rd, reg);
2739 tcg_temp_free_i64(reg);
2740 }
2741 return true;
2742}
2743
ac4fb247
RH
2744TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2745TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2746
2747/* Compute CLAST for a Xreg. */
2748static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2749{
2750 TCGv_i64 reg;
2751
2752 if (!sve_access_check(s)) {
2753 return true;
2754 }
2755
2756 reg = cpu_reg(s, a->rd);
2757 switch (a->esz) {
2758 case 0:
2759 tcg_gen_ext8u_i64(reg, reg);
2760 break;
2761 case 1:
2762 tcg_gen_ext16u_i64(reg, reg);
2763 break;
2764 case 2:
2765 tcg_gen_ext32u_i64(reg, reg);
2766 break;
2767 case 3:
2768 break;
2769 default:
2770 g_assert_not_reached();
2771 }
2772
2773 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2774 return true;
2775}
2776
c673404a
RH
2777TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2778TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2779
2780/* Compute LAST for a scalar. */
2781static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2782 int pg, int rm, bool before)
2783{
2784 TCGv_i32 last = tcg_temp_new_i32();
2785 TCGv_i64 ret;
2786
2787 find_last_active(s, last, esz, pg);
2788 if (before) {
2789 wrap_last_active(s, last, esz);
2790 } else {
2791 incr_last_active(s, last, esz);
2792 }
2793
2794 ret = load_last_active(s, last, rm, esz);
2795 tcg_temp_free_i32(last);
2796 return ret;
2797}
2798
2799/* Compute LAST for a Vreg. */
2800static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2801{
2802 if (sve_access_check(s)) {
2803 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2804 write_fp_dreg(s, a->rd, val);
2805 tcg_temp_free_i64(val);
2806 }
2807 return true;
2808}
2809
75de9fd4
RH
2810TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2811TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2812
2813/* Compute LAST for a Xreg. */
2814static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2815{
2816 if (sve_access_check(s)) {
2817 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2818 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2819 tcg_temp_free_i64(val);
2820 }
2821 return true;
2822}
2823
884c5a80
RH
2824TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2825TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2826
3a7be554 2827static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578 2828{
1402a6b8
RH
2829 if (!dc_isar_feature(aa64_sve, s)) {
2830 return false;
2831 }
792a5578
RH
2832 if (sve_access_check(s)) {
2833 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2834 }
2835 return true;
2836}
2837
3a7be554 2838static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578 2839{
1402a6b8
RH
2840 if (!dc_isar_feature(aa64_sve, s)) {
2841 return false;
2842 }
792a5578
RH
2843 if (sve_access_check(s)) {
2844 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2845 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2846 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2847 tcg_temp_free_i64(t);
2848 }
2849 return true;
2850}
2851
817bd5c9
RH
2852static gen_helper_gvec_3 * const revb_fns[4] = {
2853 NULL, gen_helper_sve_revb_h,
2854 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2855};
2856TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2857
817bd5c9
RH
2858static gen_helper_gvec_3 * const revh_fns[4] = {
2859 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2860};
2861TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2862
817bd5c9
RH
2863TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2864 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2865
897ebd70
RH
2866TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2867 gen_helper_sve_splice, a, a->esz)
b48ff240 2868
897ebd70
RH
2869TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2870 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2871
757f9cff
RH
2872/*
2873 *** SVE Integer Compare - Vectors Group
2874 */
2875
2876static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2877 gen_helper_gvec_flags_4 *gen_fn)
2878{
2879 TCGv_ptr pd, zn, zm, pg;
2880 unsigned vsz;
2881 TCGv_i32 t;
2882
2883 if (gen_fn == NULL) {
2884 return false;
2885 }
2886 if (!sve_access_check(s)) {
2887 return true;
2888 }
2889
2890 vsz = vec_full_reg_size(s);
392acacc 2891 t = tcg_temp_new_i32();
757f9cff
RH
2892 pd = tcg_temp_new_ptr();
2893 zn = tcg_temp_new_ptr();
2894 zm = tcg_temp_new_ptr();
2895 pg = tcg_temp_new_ptr();
2896
2897 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2898 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2899 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2900 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2901
392acacc 2902 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2903
2904 tcg_temp_free_ptr(pd);
2905 tcg_temp_free_ptr(zn);
2906 tcg_temp_free_ptr(zm);
2907 tcg_temp_free_ptr(pg);
2908
2909 do_pred_flags(t);
2910
2911 tcg_temp_free_i32(t);
2912 return true;
2913}
2914
2915#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2916 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2917 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2918 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2919 }; \
2920 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2921 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2922
2923DO_PPZZ(CMPEQ, cmpeq)
2924DO_PPZZ(CMPNE, cmpne)
2925DO_PPZZ(CMPGT, cmpgt)
2926DO_PPZZ(CMPGE, cmpge)
2927DO_PPZZ(CMPHI, cmphi)
2928DO_PPZZ(CMPHS, cmphs)
2929
2930#undef DO_PPZZ
2931
2932#define DO_PPZW(NAME, name) \
671bdb2e
RH
2933 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2934 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2935 gen_helper_sve_##name##_ppzw_s, NULL \
2936 }; \
2937 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2938 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2939
2940DO_PPZW(CMPEQ, cmpeq)
2941DO_PPZW(CMPNE, cmpne)
2942DO_PPZW(CMPGT, cmpgt)
2943DO_PPZW(CMPGE, cmpge)
2944DO_PPZW(CMPHI, cmphi)
2945DO_PPZW(CMPHS, cmphs)
2946DO_PPZW(CMPLT, cmplt)
2947DO_PPZW(CMPLE, cmple)
2948DO_PPZW(CMPLO, cmplo)
2949DO_PPZW(CMPLS, cmpls)
2950
2951#undef DO_PPZW
2952
38cadeba
RH
2953/*
2954 *** SVE Integer Compare - Immediate Groups
2955 */
2956
2957static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2958 gen_helper_gvec_flags_3 *gen_fn)
2959{
2960 TCGv_ptr pd, zn, pg;
2961 unsigned vsz;
2962 TCGv_i32 t;
2963
2964 if (gen_fn == NULL) {
2965 return false;
2966 }
2967 if (!sve_access_check(s)) {
2968 return true;
2969 }
2970
2971 vsz = vec_full_reg_size(s);
392acacc 2972 t = tcg_temp_new_i32();
38cadeba
RH
2973 pd = tcg_temp_new_ptr();
2974 zn = tcg_temp_new_ptr();
2975 pg = tcg_temp_new_ptr();
2976
2977 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2978 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2979 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2980
392acacc 2981 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
2982
2983 tcg_temp_free_ptr(pd);
2984 tcg_temp_free_ptr(zn);
2985 tcg_temp_free_ptr(pg);
2986
2987 do_pred_flags(t);
2988
2989 tcg_temp_free_i32(t);
2990 return true;
2991}
2992
2993#define DO_PPZI(NAME, name) \
9c545be6 2994 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
2995 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2996 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2997 }; \
9c545be6
RH
2998 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2999 name##_ppzi_fns[a->esz])
38cadeba
RH
3000
3001DO_PPZI(CMPEQ, cmpeq)
3002DO_PPZI(CMPNE, cmpne)
3003DO_PPZI(CMPGT, cmpgt)
3004DO_PPZI(CMPGE, cmpge)
3005DO_PPZI(CMPHI, cmphi)
3006DO_PPZI(CMPHS, cmphs)
3007DO_PPZI(CMPLT, cmplt)
3008DO_PPZI(CMPLE, cmple)
3009DO_PPZI(CMPLO, cmplo)
3010DO_PPZI(CMPLS, cmpls)
3011
3012#undef DO_PPZI
3013
35da316f
RH
3014/*
3015 *** SVE Partition Break Group
3016 */
3017
3018static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3019 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3020{
3021 if (!sve_access_check(s)) {
3022 return true;
3023 }
3024
3025 unsigned vsz = pred_full_reg_size(s);
3026
3027 /* Predicate sizes may be smaller and cannot use simd_desc. */
3028 TCGv_ptr d = tcg_temp_new_ptr();
3029 TCGv_ptr n = tcg_temp_new_ptr();
3030 TCGv_ptr m = tcg_temp_new_ptr();
3031 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3032 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3033
3034 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3035 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3036 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3037 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3038
3039 if (a->s) {
93418f1c
RH
3040 TCGv_i32 t = tcg_temp_new_i32();
3041 fn_s(t, d, n, m, g, desc);
35da316f 3042 do_pred_flags(t);
93418f1c 3043 tcg_temp_free_i32(t);
35da316f 3044 } else {
93418f1c 3045 fn(d, n, m, g, desc);
35da316f
RH
3046 }
3047 tcg_temp_free_ptr(d);
3048 tcg_temp_free_ptr(n);
3049 tcg_temp_free_ptr(m);
3050 tcg_temp_free_ptr(g);
35da316f
RH
3051 return true;
3052}
3053
3054static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3055 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3056{
3057 if (!sve_access_check(s)) {
3058 return true;
3059 }
3060
3061 unsigned vsz = pred_full_reg_size(s);
3062
3063 /* Predicate sizes may be smaller and cannot use simd_desc. */
3064 TCGv_ptr d = tcg_temp_new_ptr();
3065 TCGv_ptr n = tcg_temp_new_ptr();
3066 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3067 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3068
3069 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3070 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3071 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3072
3073 if (a->s) {
93418f1c
RH
3074 TCGv_i32 t = tcg_temp_new_i32();
3075 fn_s(t, d, n, g, desc);
35da316f 3076 do_pred_flags(t);
93418f1c 3077 tcg_temp_free_i32(t);
35da316f 3078 } else {
93418f1c 3079 fn(d, n, g, desc);
35da316f
RH
3080 }
3081 tcg_temp_free_ptr(d);
3082 tcg_temp_free_ptr(n);
3083 tcg_temp_free_ptr(g);
35da316f
RH
3084 return true;
3085}
3086
2224d24d
RH
3087TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3088 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3089TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3090 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3091
3092TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3093 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3094TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3095 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3096
3097TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3098 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3099TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3100 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3101
3102TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3103 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 3104
9ee3a611
RH
3105/*
3106 *** SVE Predicate Count Group
3107 */
3108
3109static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3110{
3111 unsigned psz = pred_full_reg_size(s);
3112
3113 if (psz <= 8) {
3114 uint64_t psz_mask;
3115
3116 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3117 if (pn != pg) {
3118 TCGv_i64 g = tcg_temp_new_i64();
3119 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3120 tcg_gen_and_i64(val, val, g);
3121 tcg_temp_free_i64(g);
3122 }
3123
3124 /* Reduce the pred_esz_masks value simply to reduce the
3125 * size of the code generated here.
3126 */
3127 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3128 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3129
3130 tcg_gen_ctpop_i64(val, val);
3131 } else {
3132 TCGv_ptr t_pn = tcg_temp_new_ptr();
3133 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3134 unsigned desc = 0;
9ee3a611 3135
f556a201
RH
3136 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3137 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3138
3139 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3140 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3141
c6a59b55 3142 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3143 tcg_temp_free_ptr(t_pn);
3144 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3145 }
3146}
3147
3a7be554 3148static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611 3149{
1402a6b8
RH
3150 if (!dc_isar_feature(aa64_sve, s)) {
3151 return false;
3152 }
9ee3a611
RH
3153 if (sve_access_check(s)) {
3154 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3155 }
3156 return true;
3157}
3158
3a7be554 3159static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611 3160{
1402a6b8
RH
3161 if (!dc_isar_feature(aa64_sve, s)) {
3162 return false;
3163 }
9ee3a611
RH
3164 if (sve_access_check(s)) {
3165 TCGv_i64 reg = cpu_reg(s, a->rd);
3166 TCGv_i64 val = tcg_temp_new_i64();
3167
3168 do_cntp(s, val, a->esz, a->pg, a->pg);
3169 if (a->d) {
3170 tcg_gen_sub_i64(reg, reg, val);
3171 } else {
3172 tcg_gen_add_i64(reg, reg, val);
3173 }
3174 tcg_temp_free_i64(val);
3175 }
3176 return true;
3177}
3178
3a7be554 3179static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611 3180{
1402a6b8 3181 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
9ee3a611
RH
3182 return false;
3183 }
3184 if (sve_access_check(s)) {
3185 unsigned vsz = vec_full_reg_size(s);
3186 TCGv_i64 val = tcg_temp_new_i64();
3187 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3188
3189 do_cntp(s, val, a->esz, a->pg, a->pg);
3190 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3191 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3192 }
3193 return true;
3194}
3195
3a7be554 3196static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611 3197{
1402a6b8
RH
3198 if (!dc_isar_feature(aa64_sve, s)) {
3199 return false;
3200 }
9ee3a611
RH
3201 if (sve_access_check(s)) {
3202 TCGv_i64 reg = cpu_reg(s, a->rd);
3203 TCGv_i64 val = tcg_temp_new_i64();
3204
3205 do_cntp(s, val, a->esz, a->pg, a->pg);
3206 do_sat_addsub_32(reg, val, a->u, a->d);
3207 }
3208 return true;
3209}
3210
3a7be554 3211static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611 3212{
1402a6b8
RH
3213 if (!dc_isar_feature(aa64_sve, s)) {
3214 return false;
3215 }
9ee3a611
RH
3216 if (sve_access_check(s)) {
3217 TCGv_i64 reg = cpu_reg(s, a->rd);
3218 TCGv_i64 val = tcg_temp_new_i64();
3219
3220 do_cntp(s, val, a->esz, a->pg, a->pg);
3221 do_sat_addsub_64(reg, val, a->u, a->d);
3222 }
3223 return true;
3224}
3225
3a7be554 3226static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611 3227{
1402a6b8 3228 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
9ee3a611
RH
3229 return false;
3230 }
3231 if (sve_access_check(s)) {
3232 TCGv_i64 val = tcg_temp_new_i64();
3233 do_cntp(s, val, a->esz, a->pg, a->pg);
3234 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3235 }
3236 return true;
3237}
3238
caf1cefc
RH
3239/*
3240 *** SVE Integer Compare Scalars Group
3241 */
3242
3a7be554 3243static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc 3244{
1402a6b8
RH
3245 if (!dc_isar_feature(aa64_sve, s)) {
3246 return false;
3247 }
caf1cefc
RH
3248 if (!sve_access_check(s)) {
3249 return true;
3250 }
3251
3252 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3253 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3254 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3255 TCGv_i64 cmp = tcg_temp_new_i64();
3256
3257 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3258 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3259 tcg_temp_free_i64(cmp);
3260
3261 /* VF = !NF & !CF. */
3262 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3263 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3264
3265 /* Both NF and VF actually look at bit 31. */
3266 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3267 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3268 return true;
3269}
3270
3a7be554 3271static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3272{
bbd0968c 3273 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3274 TCGv_i32 t2;
caf1cefc 3275 TCGv_ptr ptr;
e610906c
RH
3276 unsigned vsz = vec_full_reg_size(s);
3277 unsigned desc = 0;
caf1cefc 3278 TCGCond cond;
34688dbc
RH
3279 uint64_t maxval;
3280 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3281 bool eq = a->eq == a->lt;
caf1cefc 3282
34688dbc 3283 /* The greater-than conditions are all SVE2. */
1402a6b8
RH
3284 if (a->lt
3285 ? !dc_isar_feature(aa64_sve, s)
3286 : !dc_isar_feature(aa64_sve2, s)) {
34688dbc
RH
3287 return false;
3288 }
bbd0968c
RH
3289 if (!sve_access_check(s)) {
3290 return true;
3291 }
3292
3293 op0 = read_cpu_reg(s, a->rn, 1);
3294 op1 = read_cpu_reg(s, a->rm, 1);
3295
caf1cefc
RH
3296 if (!a->sf) {
3297 if (a->u) {
3298 tcg_gen_ext32u_i64(op0, op0);
3299 tcg_gen_ext32u_i64(op1, op1);
3300 } else {
3301 tcg_gen_ext32s_i64(op0, op0);
3302 tcg_gen_ext32s_i64(op1, op1);
3303 }
3304 }
3305
3306 /* For the helper, compress the different conditions into a computation
3307 * of how many iterations for which the condition is true.
caf1cefc 3308 */
bbd0968c
RH
3309 t0 = tcg_temp_new_i64();
3310 t1 = tcg_temp_new_i64();
34688dbc
RH
3311
3312 if (a->lt) {
3313 tcg_gen_sub_i64(t0, op1, op0);
3314 if (a->u) {
3315 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3316 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3317 } else {
3318 maxval = a->sf ? INT64_MAX : INT32_MAX;
3319 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3320 }
3321 } else {
3322 tcg_gen_sub_i64(t0, op0, op1);
3323 if (a->u) {
3324 maxval = 0;
3325 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3326 } else {
3327 maxval = a->sf ? INT64_MIN : INT32_MIN;
3328 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3329 }
3330 }
caf1cefc 3331
4481bbf2 3332 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3333 if (eq) {
caf1cefc
RH
3334 /* Equality means one more iteration. */
3335 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3336
34688dbc
RH
3337 /*
3338 * For the less-than while, if op1 is maxval (and the only time
3339 * the addition above could overflow), then we produce an all-true
3340 * predicate by setting the count to the vector length. This is
3341 * because the pseudocode is described as an increment + compare
3342 * loop, and the maximum integer would always compare true.
3343 * Similarly, the greater-than while has the same issue with the
3344 * minimum integer due to the decrement + compare loop.
bbd0968c 3345 */
34688dbc 3346 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3347 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3348 }
3349
bbd0968c
RH
3350 /* Bound to the maximum. */
3351 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3352
3353 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3354 tcg_gen_movi_i64(t1, 0);
3355 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3356 tcg_temp_free_i64(t1);
caf1cefc 3357
bbd0968c 3358 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3359 t2 = tcg_temp_new_i32();
3360 tcg_gen_extrl_i64_i32(t2, t0);
3361 tcg_temp_free_i64(t0);
bbd0968c
RH
3362
3363 /* Scale elements to bits. */
3364 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3365
e610906c
RH
3366 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3367 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3368
3369 ptr = tcg_temp_new_ptr();
3370 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3371
34688dbc 3372 if (a->lt) {
4481bbf2 3373 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3374 } else {
4481bbf2 3375 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3376 }
caf1cefc
RH
3377 do_pred_flags(t2);
3378
3379 tcg_temp_free_ptr(ptr);
3380 tcg_temp_free_i32(t2);
caf1cefc
RH
3381 return true;
3382}
3383
14f6dad1
RH
3384static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3385{
3386 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3387 TCGv_i32 t2;
14f6dad1
RH
3388 TCGv_ptr ptr;
3389 unsigned vsz = vec_full_reg_size(s);
3390 unsigned desc = 0;
3391
3392 if (!dc_isar_feature(aa64_sve2, s)) {
3393 return false;
3394 }
3395 if (!sve_access_check(s)) {
3396 return true;
3397 }
3398
3399 op0 = read_cpu_reg(s, a->rn, 1);
3400 op1 = read_cpu_reg(s, a->rm, 1);
3401
4481bbf2 3402 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3403 diff = tcg_temp_new_i64();
3404
3405 if (a->rw) {
3406 /* WHILERW */
3407 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3408 t1 = tcg_temp_new_i64();
3409 tcg_gen_sub_i64(diff, op0, op1);
3410 tcg_gen_sub_i64(t1, op1, op0);
3411 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3412 tcg_temp_free_i64(t1);
3413 /* Round down to a multiple of ESIZE. */
3414 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3415 /* If op1 == op0, diff == 0, and the condition is always true. */
3416 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3417 } else {
3418 /* WHILEWR */
3419 tcg_gen_sub_i64(diff, op1, op0);
3420 /* Round down to a multiple of ESIZE. */
3421 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3422 /* If op0 >= op1, diff <= 0, the condition is always true. */
3423 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3424 }
3425
3426 /* Bound to the maximum. */
3427 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3428
3429 /* Since we're bounded, pass as a 32-bit type. */
3430 t2 = tcg_temp_new_i32();
3431 tcg_gen_extrl_i64_i32(t2, diff);
3432 tcg_temp_free_i64(diff);
3433
3434 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3435 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3436
3437 ptr = tcg_temp_new_ptr();
3438 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3439
4481bbf2 3440 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3441 do_pred_flags(t2);
3442
3443 tcg_temp_free_ptr(ptr);
3444 tcg_temp_free_i32(t2);
14f6dad1
RH
3445 return true;
3446}
3447
ed491961
RH
3448/*
3449 *** SVE Integer Wide Immediate - Unpredicated Group
3450 */
3451
3a7be554 3452static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961 3453{
1402a6b8 3454 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
ed491961
RH
3455 return false;
3456 }
3457 if (sve_access_check(s)) {
3458 unsigned vsz = vec_full_reg_size(s);
3459 int dofs = vec_full_reg_offset(s, a->rd);
3460 uint64_t imm;
3461
3462 /* Decode the VFP immediate. */
3463 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3464 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3465 }
3466 return true;
3467}
3468
3a7be554 3469static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3470{
1402a6b8
RH
3471 if (!dc_isar_feature(aa64_sve, s)) {
3472 return false;
3473 }
ed491961
RH
3474 if (sve_access_check(s)) {
3475 unsigned vsz = vec_full_reg_size(s);
3476 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3477 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3478 }
3479 return true;
3480}
3481
48ca613d 3482TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3483
3a7be554 3484static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3485{
3486 a->imm = -a->imm;
3a7be554 3487 return trans_ADD_zzi(s, a);
6e6a157d
RH
3488}
3489
3a7be554 3490static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3491{
53229a77 3492 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3493 static const GVecGen2s op[4] = {
3494 { .fni8 = tcg_gen_vec_sub8_i64,
3495 .fniv = tcg_gen_sub_vec,
3496 .fno = gen_helper_sve_subri_b,
53229a77 3497 .opt_opc = vecop_list,
6e6a157d
RH
3498 .vece = MO_8,
3499 .scalar_first = true },
3500 { .fni8 = tcg_gen_vec_sub16_i64,
3501 .fniv = tcg_gen_sub_vec,
3502 .fno = gen_helper_sve_subri_h,
53229a77 3503 .opt_opc = vecop_list,
6e6a157d
RH
3504 .vece = MO_16,
3505 .scalar_first = true },
3506 { .fni4 = tcg_gen_sub_i32,
3507 .fniv = tcg_gen_sub_vec,
3508 .fno = gen_helper_sve_subri_s,
53229a77 3509 .opt_opc = vecop_list,
6e6a157d
RH
3510 .vece = MO_32,
3511 .scalar_first = true },
3512 { .fni8 = tcg_gen_sub_i64,
3513 .fniv = tcg_gen_sub_vec,
3514 .fno = gen_helper_sve_subri_d,
53229a77 3515 .opt_opc = vecop_list,
6e6a157d
RH
3516 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3517 .vece = MO_64,
3518 .scalar_first = true }
3519 };
3520
1402a6b8
RH
3521 if (!dc_isar_feature(aa64_sve, s)) {
3522 return false;
3523 }
6e6a157d
RH
3524 if (sve_access_check(s)) {
3525 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3526 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3527 vec_full_reg_offset(s, a->rn),
9fff3fcc 3528 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3529 }
3530 return true;
3531}
3532
fa4bd72c 3533TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3534
3a7be554 3535static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3536{
6e6a157d 3537 if (sve_access_check(s)) {
138a1f7b
RH
3538 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3539 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3540 }
3541 return true;
3542}
3543
17b54d1c
RH
3544TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3545TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3546TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3547TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3548
3549static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3550{
3551 if (sve_access_check(s)) {
3552 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3553 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3554 vec_full_reg_offset(s, a->rn),
138a1f7b 3555 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3556 }
3557 return true;
3558}
3559
3560#define DO_ZZI(NAME, name) \
ef4a3958 3561 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3562 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3563 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3564 }; \
ef4a3958 3565 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3566
3567DO_ZZI(SMAX, smax)
3568DO_ZZI(UMAX, umax)
3569DO_ZZI(SMIN, smin)
3570DO_ZZI(UMIN, umin)
3571
3572#undef DO_ZZI
3573
5f425b92
RH
3574static gen_helper_gvec_4 * const dot_fns[2][2] = {
3575 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3576 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3577};
3578TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3579 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3580
814d4c52
RH
3581/*
3582 * SVE Multiply - Indexed
3583 */
3584
f3500a25
RH
3585TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3586 gen_helper_gvec_sdot_idx_b, a)
3587TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3588 gen_helper_gvec_sdot_idx_h, a)
3589TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3590 gen_helper_gvec_udot_idx_b, a)
3591TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3592 gen_helper_gvec_udot_idx_h, a)
3593
3594TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3595 gen_helper_gvec_sudot_idx_b, a)
3596TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3597 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3598
814d4c52 3599#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3600 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3601 a->rd, a->rn, a->rm, a->index)
814d4c52 3602
af031f64
RH
3603DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3604DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3605DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3606
af031f64
RH
3607DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3608DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3609DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3610
af031f64
RH
3611DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3612DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3613DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3614
814d4c52
RH
3615#undef DO_SVE2_RRX
3616
b95f5eeb 3617#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3618 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3619 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3620
3621DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3622DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3623DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3624DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3625
3626DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3627DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3628DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3629DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3630
3631DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3632DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3633DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3634DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3635
b95f5eeb
RH
3636#undef DO_SVE2_RRX_TB
3637
8a02aac7 3638#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3639 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3640
8681eb76
RH
3641DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3642DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3643DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3644
8681eb76
RH
3645DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3646DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3647DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3648
8681eb76
RH
3649DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3650DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3651DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3652
8681eb76
RH
3653DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3654DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3655DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3656
8a02aac7
RH
3657#undef DO_SVE2_RRXR
3658
c5c455d7 3659#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3660 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3661 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3662
3663DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3664DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3665DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3666DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3667
3668DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3669DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3670DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3671DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3672
3673DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3674DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3675DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3676DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3677
3678DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3679DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3680DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3681DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3682
3683DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3684DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3685DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3686DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3687
3688DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3689DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3690DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3691DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3692
3693#undef DO_SVE2_RRXR_TB
3694
3b787ed8 3695#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3696 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3697 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3698
3699DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3700DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3701
3702DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3703DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3704
21068f39
RH
3705DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3706DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3707
3b787ed8
RH
3708#undef DO_SVE2_RRXR_ROT
3709
ca40a6e6
RH
3710/*
3711 *** SVE Floating Point Multiply-Add Indexed Group
3712 */
3713
0a82d963 3714static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3715{
41bf9b67
RH
3716 static gen_helper_gvec_4_ptr * const fns[4] = {
3717 NULL,
ca40a6e6
RH
3718 gen_helper_gvec_fmla_idx_h,
3719 gen_helper_gvec_fmla_idx_s,
3720 gen_helper_gvec_fmla_idx_d,
3721 };
41bf9b67
RH
3722 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3723 (a->index << 1) | sub,
3724 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3725}
3726
3b879c28
RH
3727TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3728TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
0a82d963 3729
ca40a6e6
RH
3730/*
3731 *** SVE Floating Point Multiply Indexed Group
3732 */
3733
9c99ef66
RH
3734static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3735 NULL, gen_helper_gvec_fmul_idx_h,
3736 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3737};
3738TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3739 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3740 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
ca40a6e6 3741
23fbe79f
RH
3742/*
3743 *** SVE Floating Point Fast Reduction Group
3744 */
3745
3746typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3747 TCGv_ptr, TCGv_i32);
3748
5ce18efe 3749static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
23fbe79f
RH
3750 gen_helper_fp_reduce *fn)
3751{
5ce18efe
RH
3752 unsigned vsz, p2vsz;
3753 TCGv_i32 t_desc;
23fbe79f
RH
3754 TCGv_ptr t_zn, t_pg, status;
3755 TCGv_i64 temp;
3756
5ce18efe
RH
3757 if (fn == NULL) {
3758 return false;
3759 }
3760 if (!sve_access_check(s)) {
3761 return true;
3762 }
3763
3764 vsz = vec_full_reg_size(s);
3765 p2vsz = pow2ceil(vsz);
3766 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3767 temp = tcg_temp_new_i64();
3768 t_zn = tcg_temp_new_ptr();
3769 t_pg = tcg_temp_new_ptr();
3770
3771 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3772 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3773 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3774
3775 fn(temp, t_zn, t_pg, status, t_desc);
3776 tcg_temp_free_ptr(t_zn);
3777 tcg_temp_free_ptr(t_pg);
3778 tcg_temp_free_ptr(status);
23fbe79f
RH
3779
3780 write_fp_dreg(s, a->rd, temp);
3781 tcg_temp_free_i64(temp);
5ce18efe 3782 return true;
23fbe79f
RH
3783}
3784
3785#define DO_VPZ(NAME, name) \
8003e7cf
RH
3786 static gen_helper_fp_reduce * const name##_fns[4] = { \
3787 NULL, gen_helper_sve_##name##_h, \
3788 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
23fbe79f 3789 }; \
8003e7cf 3790 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
23fbe79f
RH
3791
3792DO_VPZ(FADDV, faddv)
3793DO_VPZ(FMINNMV, fminnmv)
3794DO_VPZ(FMAXNMV, fmaxnmv)
3795DO_VPZ(FMINV, fminv)
3796DO_VPZ(FMAXV, fmaxv)
3797
8003e7cf
RH
3798#undef DO_VPZ
3799
3887c038
RH
3800/*
3801 *** SVE Floating Point Unary Operations - Unpredicated Group
3802 */
3803
de58c6b0
RH
3804static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3805 NULL, gen_helper_gvec_frecpe_h,
3806 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3807};
3808TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3887c038 3809
de58c6b0
RH
3810static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3811 NULL, gen_helper_gvec_frsqrte_h,
3812 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3813};
3814TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3887c038 3815
4d2e2a03
RH
3816/*
3817 *** SVE Floating Point Compare with Zero Group
3818 */
3819
63d6aef8 3820static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4d2e2a03
RH
3821 gen_helper_gvec_3_ptr *fn)
3822{
63d6aef8
RH
3823 if (fn == NULL) {
3824 return false;
3825 }
3826 if (sve_access_check(s)) {
3827 unsigned vsz = vec_full_reg_size(s);
3828 TCGv_ptr status =
3829 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03 3830
63d6aef8
RH
3831 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3832 vec_full_reg_offset(s, a->rn),
3833 pred_full_reg_offset(s, a->pg),
3834 status, vsz, vsz, 0, fn);
3835 tcg_temp_free_ptr(status);
3836 }
3837 return true;
4d2e2a03
RH
3838}
3839
3840#define DO_PPZ(NAME, name) \
63d6aef8
RH
3841 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3842 NULL, gen_helper_sve_##name##_h, \
3843 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
4d2e2a03 3844 }; \
63d6aef8 3845 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
4d2e2a03
RH
3846
3847DO_PPZ(FCMGE_ppz0, fcmge0)
3848DO_PPZ(FCMGT_ppz0, fcmgt0)
3849DO_PPZ(FCMLE_ppz0, fcmle0)
3850DO_PPZ(FCMLT_ppz0, fcmlt0)
3851DO_PPZ(FCMEQ_ppz0, fcmeq0)
3852DO_PPZ(FCMNE_ppz0, fcmne0)
3853
3854#undef DO_PPZ
3855
67fcd9ad
RH
3856/*
3857 *** SVE floating-point trig multiply-add coefficient
3858 */
3859
cdd85923
RH
3860static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3861 NULL, gen_helper_sve_ftmad_h,
3862 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3863};
7272e98a
RH
3864TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3865 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3866 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
67fcd9ad 3867
7f9ddf64
RH
3868/*
3869 *** SVE Floating Point Accumulating Reduction Group
3870 */
3871
3a7be554 3872static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3873{
3874 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3875 TCGv_ptr, TCGv_ptr, TCGv_i32);
3876 static fadda_fn * const fns[3] = {
3877 gen_helper_sve_fadda_h,
3878 gen_helper_sve_fadda_s,
3879 gen_helper_sve_fadda_d,
3880 };
3881 unsigned vsz = vec_full_reg_size(s);
3882 TCGv_ptr t_rm, t_pg, t_fpst;
3883 TCGv_i64 t_val;
3884 TCGv_i32 t_desc;
3885
1402a6b8 3886 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
7f9ddf64
RH
3887 return false;
3888 }
7272e98a 3889 s->is_nonstreaming = true;
7f9ddf64
RH
3890 if (!sve_access_check(s)) {
3891 return true;
3892 }
3893
3894 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3895 t_rm = tcg_temp_new_ptr();
3896 t_pg = tcg_temp_new_ptr();
3897 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3898 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3899 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3900 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3901
3902 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3903
7f9ddf64
RH
3904 tcg_temp_free_ptr(t_fpst);
3905 tcg_temp_free_ptr(t_pg);
3906 tcg_temp_free_ptr(t_rm);
3907
3908 write_fp_dreg(s, a->rd, t_val);
3909 tcg_temp_free_i64(t_val);
3910 return true;
3911}
3912
29b80469
RH
3913/*
3914 *** SVE Floating Point Arithmetic - Unpredicated Group
3915 */
3916
29b80469 3917#define DO_FP3(NAME, name) \
bdd4ce0d 3918 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
29b80469
RH
3919 NULL, gen_helper_gvec_##name##_h, \
3920 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3921 }; \
bdd4ce0d 3922 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
29b80469
RH
3923
3924DO_FP3(FADD_zzz, fadd)
3925DO_FP3(FSUB_zzz, fsub)
3926DO_FP3(FMUL_zzz, fmul)
29b80469
RH
3927DO_FP3(FRECPS, recps)
3928DO_FP3(FRSQRTS, rsqrts)
3929
3930#undef DO_FP3
3931
7272e98a
RH
3932static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
3933 NULL, gen_helper_gvec_ftsmul_h,
3934 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
3935};
3936TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
3937 ftsmul_fns[a->esz], a, 0)
3938
ec3b87c2
RH
3939/*
3940 *** SVE Floating Point Arithmetic - Predicated Group
3941 */
3942
7de2617b
RH
3943#define DO_ZPZZ_FP(NAME, FEAT, name) \
3944 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3945 NULL, gen_helper_##name##_h, \
3946 gen_helper_##name##_s, gen_helper_##name##_d \
3947 }; \
3948 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3949
3950DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3951DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3952DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3953DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3954DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3955DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3956DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3957DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3958DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3959DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3960DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
8092c6a3 3961
cc48affe
RH
3962typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3963 TCGv_i64, TCGv_ptr, TCGv_i32);
3964
3965static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3966 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3967{
3968 unsigned vsz = vec_full_reg_size(s);
3969 TCGv_ptr t_zd, t_zn, t_pg, status;
3970 TCGv_i32 desc;
3971
3972 t_zd = tcg_temp_new_ptr();
3973 t_zn = tcg_temp_new_ptr();
3974 t_pg = tcg_temp_new_ptr();
3975 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3976 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3977 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3978
cdfb22bb 3979 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3980 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
3981 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3982
cc48affe
RH
3983 tcg_temp_free_ptr(status);
3984 tcg_temp_free_ptr(t_pg);
3985 tcg_temp_free_ptr(t_zn);
3986 tcg_temp_free_ptr(t_zd);
3987}
3988
413ee8e4 3989static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
cc48affe
RH
3990 gen_helper_sve_fp2scalar *fn)
3991{
413ee8e4
RH
3992 if (fn == NULL) {
3993 return false;
3994 }
3995 if (sve_access_check(s)) {
3996 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3997 tcg_constant_i64(imm), fn);
3998 }
3999 return true;
cc48affe
RH
4000}
4001
98c37459
RH
4002#define DO_FP_IMM(NAME, name, const0, const1) \
4003 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
4004 NULL, gen_helper_sve_##name##_h, \
4005 gen_helper_sve_##name##_s, \
4006 gen_helper_sve_##name##_d \
4007 }; \
4008 static uint64_t const name##_const[4][2] = { \
4009 { -1, -1 }, \
4010 { float16_##const0, float16_##const1 }, \
4011 { float32_##const0, float32_##const1 }, \
4012 { float64_##const0, float64_##const1 }, \
4013 }; \
4014 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
4015 name##_const[a->esz][a->imm], name##_fns[a->esz])
cc48affe 4016
cc48affe
RH
4017DO_FP_IMM(FADD, fadds, half, one)
4018DO_FP_IMM(FSUB, fsubs, half, one)
4019DO_FP_IMM(FMUL, fmuls, half, two)
4020DO_FP_IMM(FSUBR, fsubrs, half, one)
4021DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4022DO_FP_IMM(FMINNM, fminnms, zero, one)
4023DO_FP_IMM(FMAX, fmaxs, zero, one)
4024DO_FP_IMM(FMIN, fmins, zero, one)
4025
4026#undef DO_FP_IMM
4027
abfdefd5
RH
4028static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4029 gen_helper_gvec_4_ptr *fn)
4030{
4031 if (fn == NULL) {
4032 return false;
4033 }
4034 if (sve_access_check(s)) {
4035 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4036 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4037 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4038 vec_full_reg_offset(s, a->rn),
4039 vec_full_reg_offset(s, a->rm),
4040 pred_full_reg_offset(s, a->pg),
4041 status, vsz, vsz, 0, fn);
4042 tcg_temp_free_ptr(status);
4043 }
4044 return true;
4045}
4046
4047#define DO_FPCMP(NAME, name) \
d961b3e4 4048 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
abfdefd5
RH
4049 NULL, gen_helper_sve_##name##_h, \
4050 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4051 }; \
d961b3e4 4052 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
abfdefd5
RH
4053
4054DO_FPCMP(FCMGE, fcmge)
4055DO_FPCMP(FCMGT, fcmgt)
4056DO_FPCMP(FCMEQ, fcmeq)
4057DO_FPCMP(FCMNE, fcmne)
4058DO_FPCMP(FCMUO, fcmuo)
4059DO_FPCMP(FACGE, facge)
4060DO_FPCMP(FACGT, facgt)
4061
4062#undef DO_FPCMP
4063
6f5cd670
RH
4064static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
4065 NULL, gen_helper_sve_fcadd_h,
4066 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
4067};
4068TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
4069 a->rd, a->rn, a->rm, a->pg, a->rot,
4070 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
76a9d9cd 4071
6ceabaad 4072#define DO_FMLA(NAME, name) \
498be5b8
RH
4073 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
4074 NULL, gen_helper_sve_##name##_h, \
4075 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4076 }; \
4077 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4078 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4079 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
6ceabaad
RH
4080
4081DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4082DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4083DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4084DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4085
4086#undef DO_FMLA
4087
498be5b8
RH
4088static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4089 NULL, gen_helper_sve_fcmla_zpzzz_h,
4090 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4091};
4092TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4093 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
4094 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
05f48bab 4095
e600d649
RH
4096static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
4097 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
4098};
4099TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
4100 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
4101 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
18fc2405 4102
8092c6a3
RH
4103/*
4104 *** SVE Floating Point Unary Operations Predicated Group
4105 */
4106
0360730c
RH
4107TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4108 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4109TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4110 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4111
4112TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4113 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4114
4115TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4116 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4117TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4118 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4119TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4120 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4121TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4122 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4123
4124TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4125 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4126TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4127 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4128TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4129 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4130TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4131 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4132TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4133 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4134TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4135 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4136
4137TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4138 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4139TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4140 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4141TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4142 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4143TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4144 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4145TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4146 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4147TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4148 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4149
4150TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4151 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4152TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4153 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
df4de1af 4154
ed6bb6b4
RH
4155static gen_helper_gvec_3_ptr * const frint_fns[] = {
4156 NULL,
cda3c753
RH
4157 gen_helper_sve_frint_h,
4158 gen_helper_sve_frint_s,
4159 gen_helper_sve_frint_d
4160};
0360730c
RH
4161TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4162 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
cda3c753 4163
0360730c
RH
4164static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4165 NULL,
4166 gen_helper_sve_frintx_h,
4167 gen_helper_sve_frintx_s,
4168 gen_helper_sve_frintx_d
4169};
4170TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4171 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4172
95365277
SL
4173static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4174 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4175{
13c0dd17
RH
4176 unsigned vsz;
4177 TCGv_i32 tmode;
4178 TCGv_ptr status;
cda3c753 4179
13c0dd17
RH
4180 if (fn == NULL) {
4181 return false;
4182 }
4183 if (!sve_access_check(s)) {
4184 return true;
4185 }
cda3c753 4186
13c0dd17
RH
4187 vsz = vec_full_reg_size(s);
4188 tmode = tcg_const_i32(mode);
4189 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4190
13c0dd17
RH
4191 gen_helper_set_rmode(tmode, tmode, status);
4192
4193 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4194 vec_full_reg_offset(s, a->rn),
4195 pred_full_reg_offset(s, a->pg),
4196 status, vsz, vsz, 0, fn);
4197
4198 gen_helper_set_rmode(tmode, tmode, status);
4199 tcg_temp_free_i32(tmode);
4200 tcg_temp_free_ptr(status);
cda3c753
RH
4201 return true;
4202}
4203
27645836
RH
4204TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4205 float_round_nearest_even, frint_fns[a->esz])
4206TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4207 float_round_up, frint_fns[a->esz])
4208TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4209 float_round_down, frint_fns[a->esz])
4210TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4211 float_round_to_zero, frint_fns[a->esz])
4212TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4213 float_round_ties_away, frint_fns[a->esz])
cda3c753 4214
0360730c
RH
4215static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4216 NULL, gen_helper_sve_frecpx_h,
4217 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4218};
4219TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4220 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
8092c6a3 4221
0360730c
RH
4222static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4223 NULL, gen_helper_sve_fsqrt_h,
4224 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4225};
4226TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4227 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4228
4229TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4230 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4231TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4232 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4233TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4234 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4235
4236TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4237 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4238TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4239 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4240
4241TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4242 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4243TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4244 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4245
4246TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4247 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4248TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4249 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4250TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4251 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4252
4253TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4254 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4255TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4256 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4257TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4258 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4259
4260TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4261 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
8092c6a3 4262
d1822297
RH
4263/*
4264 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4265 */
4266
4267/* Subroutine loading a vector register at VOFS of LEN bytes.
4268 * The load should begin at the address Rn + IMM.
4269 */
4270
19f2acc9 4271static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4272{
19f2acc9
RH
4273 int len_align = QEMU_ALIGN_DOWN(len, 8);
4274 int len_remain = len % 8;
4275 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4276 int midx = get_mem_index(s);
b2aa8879 4277 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4278
b2aa8879
RH
4279 dirty_addr = tcg_temp_new_i64();
4280 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4281 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4282 tcg_temp_free_i64(dirty_addr);
d1822297 4283
b2aa8879
RH
4284 /*
4285 * Note that unpredicated load/store of vector/predicate registers
d1822297 4286 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4287 * operations on larger quantities.
d1822297
RH
4288 * Attempt to keep code expansion to a minimum by limiting the
4289 * amount of unrolling done.
4290 */
4291 if (nparts <= 4) {
4292 int i;
4293
b2aa8879 4294 t0 = tcg_temp_new_i64();
d1822297 4295 for (i = 0; i < len_align; i += 8) {
fc313c64 4296 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4297 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4298 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4299 }
b2aa8879 4300 tcg_temp_free_i64(t0);
d1822297
RH
4301 } else {
4302 TCGLabel *loop = gen_new_label();
4303 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4304
b2aa8879
RH
4305 /* Copy the clean address into a local temp, live across the loop. */
4306 t0 = clean_addr;
4b4dc975 4307 clean_addr = new_tmp_a64_local(s);
b2aa8879 4308 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4309
b2aa8879 4310 gen_set_label(loop);
d1822297 4311
b2aa8879 4312 t0 = tcg_temp_new_i64();
fc313c64 4313 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4314 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4315
b2aa8879 4316 tp = tcg_temp_new_ptr();
d1822297
RH
4317 tcg_gen_add_ptr(tp, cpu_env, i);
4318 tcg_gen_addi_ptr(i, i, 8);
4319 tcg_gen_st_i64(t0, tp, vofs);
4320 tcg_temp_free_ptr(tp);
b2aa8879 4321 tcg_temp_free_i64(t0);
d1822297
RH
4322
4323 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4324 tcg_temp_free_ptr(i);
4325 }
4326
b2aa8879
RH
4327 /*
4328 * Predicate register loads can be any multiple of 2.
d1822297
RH
4329 * Note that we still store the entire 64-bit unit into cpu_env.
4330 */
4331 if (len_remain) {
b2aa8879 4332 t0 = tcg_temp_new_i64();
d1822297
RH
4333 switch (len_remain) {
4334 case 2:
4335 case 4:
4336 case 8:
b2aa8879
RH
4337 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4338 MO_LE | ctz32(len_remain));
d1822297
RH
4339 break;
4340
4341 case 6:
4342 t1 = tcg_temp_new_i64();
b2aa8879
RH
4343 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4344 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4345 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4346 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4347 tcg_temp_free_i64(t1);
4348 break;
4349
4350 default:
4351 g_assert_not_reached();
4352 }
4353 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4354 tcg_temp_free_i64(t0);
d1822297 4355 }
d1822297
RH
4356}
4357
5047c204 4358/* Similarly for stores. */
19f2acc9 4359static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4360{
19f2acc9
RH
4361 int len_align = QEMU_ALIGN_DOWN(len, 8);
4362 int len_remain = len % 8;
4363 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4364 int midx = get_mem_index(s);
bba87d0a 4365 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4366
bba87d0a
RH
4367 dirty_addr = tcg_temp_new_i64();
4368 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4369 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4370 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4371
4372 /* Note that unpredicated load/store of vector/predicate registers
4373 * are defined as a stream of bytes, which equates to little-endian
4374 * operations on larger quantities. There is no nice way to force
4375 * a little-endian store for aarch64_be-linux-user out of line.
4376 *
4377 * Attempt to keep code expansion to a minimum by limiting the
4378 * amount of unrolling done.
4379 */
4380 if (nparts <= 4) {
4381 int i;
4382
bba87d0a 4383 t0 = tcg_temp_new_i64();
5047c204
RH
4384 for (i = 0; i < len_align; i += 8) {
4385 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4386 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4387 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4388 }
bba87d0a 4389 tcg_temp_free_i64(t0);
5047c204
RH
4390 } else {
4391 TCGLabel *loop = gen_new_label();
bba87d0a 4392 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4393
bba87d0a
RH
4394 /* Copy the clean address into a local temp, live across the loop. */
4395 t0 = clean_addr;
4b4dc975 4396 clean_addr = new_tmp_a64_local(s);
bba87d0a 4397 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4398
bba87d0a 4399 gen_set_label(loop);
5047c204 4400
bba87d0a
RH
4401 t0 = tcg_temp_new_i64();
4402 tp = tcg_temp_new_ptr();
4403 tcg_gen_add_ptr(tp, cpu_env, i);
4404 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4405 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4406 tcg_temp_free_ptr(tp);
4407
fc313c64 4408 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4409 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4410 tcg_temp_free_i64(t0);
5047c204
RH
4411
4412 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4413 tcg_temp_free_ptr(i);
4414 }
4415
4416 /* Predicate register stores can be any multiple of 2. */
4417 if (len_remain) {
bba87d0a 4418 t0 = tcg_temp_new_i64();
5047c204 4419 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4420
4421 switch (len_remain) {
4422 case 2:
4423 case 4:
4424 case 8:
bba87d0a
RH
4425 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4426 MO_LE | ctz32(len_remain));
5047c204
RH
4427 break;
4428
4429 case 6:
bba87d0a
RH
4430 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4431 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4432 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4433 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4434 break;
4435
4436 default:
4437 g_assert_not_reached();
4438 }
bba87d0a 4439 tcg_temp_free_i64(t0);
5047c204 4440 }
5047c204
RH
4441}
4442
3a7be554 4443static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297 4444{
1402a6b8
RH
4445 if (!dc_isar_feature(aa64_sve, s)) {
4446 return false;
4447 }
d1822297
RH
4448 if (sve_access_check(s)) {
4449 int size = vec_full_reg_size(s);
4450 int off = vec_full_reg_offset(s, a->rd);
4451 do_ldr(s, off, size, a->rn, a->imm * size);
4452 }
4453 return true;
4454}
4455
3a7be554 4456static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297 4457{
1402a6b8
RH
4458 if (!dc_isar_feature(aa64_sve, s)) {
4459 return false;
4460 }
d1822297
RH
4461 if (sve_access_check(s)) {
4462 int size = pred_full_reg_size(s);
4463 int off = pred_full_reg_offset(s, a->rd);
4464 do_ldr(s, off, size, a->rn, a->imm * size);
4465 }
4466 return true;
4467}
c4e7c493 4468
3a7be554 4469static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204 4470{
1402a6b8
RH
4471 if (!dc_isar_feature(aa64_sve, s)) {
4472 return false;
4473 }
5047c204
RH
4474 if (sve_access_check(s)) {
4475 int size = vec_full_reg_size(s);
4476 int off = vec_full_reg_offset(s, a->rd);
4477 do_str(s, off, size, a->rn, a->imm * size);
4478 }
4479 return true;
4480}
4481
3a7be554 4482static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204 4483{
1402a6b8
RH
4484 if (!dc_isar_feature(aa64_sve, s)) {
4485 return false;
4486 }
5047c204
RH
4487 if (sve_access_check(s)) {
4488 int size = pred_full_reg_size(s);
4489 int off = pred_full_reg_offset(s, a->rd);
4490 do_str(s, off, size, a->rn, a->imm * size);
4491 }
4492 return true;
4493}
4494
c4e7c493
RH
4495/*
4496 *** SVE Memory - Contiguous Load Group
4497 */
4498
4499/* The memory mode of the dtype. */
14776ab5 4500static const MemOp dtype_mop[16] = {
c4e7c493
RH
4501 MO_UB, MO_UB, MO_UB, MO_UB,
4502 MO_SL, MO_UW, MO_UW, MO_UW,
4503 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4504 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4505};
4506
4507#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4508
4509/* The vector element size of dtype. */
4510static const uint8_t dtype_esz[16] = {
4511 0, 1, 2, 3,
4512 3, 1, 2, 3,
4513 3, 2, 2, 3,
4514 3, 2, 1, 3
4515};
4516
4517static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4518 int dtype, uint32_t mte_n, bool is_write,
4519 gen_helper_gvec_mem *fn)
c4e7c493
RH
4520{
4521 unsigned vsz = vec_full_reg_size(s);
4522 TCGv_ptr t_pg;
206adacf 4523 int desc = 0;
c4e7c493 4524
206adacf
RH
4525 /*
4526 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4527 * registers as pointers, so encode the regno into the data field.
4528 * For consistency, do this even for LD1.
4529 */
9473d0ec 4530 if (s->mte_active[0]) {
206adacf
RH
4531 int msz = dtype_msz(dtype);
4532
4533 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4534 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4535 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4536 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4537 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4538 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4539 } else {
4540 addr = clean_data_tbi(s, addr);
206adacf 4541 }
9473d0ec 4542
206adacf 4543 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4544 t_pg = tcg_temp_new_ptr();
4545
4546 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4547 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4548
4549 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4550}
4551
c182c6db
RH
4552/* Indexed by [mte][be][dtype][nreg] */
4553static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4554 { /* mte inactive, little-endian */
4555 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4556 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4557 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4558 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4559 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4560
4561 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4562 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4563 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4564 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4566
4567 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4568 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4569 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4570 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4571 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4572
4573 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4574 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4575 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4577 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4578
4579 /* mte inactive, big-endian */
4580 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4581 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4582 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4583 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4584 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4585
4586 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4587 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4588 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4589 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4591
4592 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4593 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4594 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4595 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4596 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4597
4598 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4600 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4601 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4602 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4603
4604 { /* mte active, little-endian */
4605 { { gen_helper_sve_ld1bb_r_mte,
4606 gen_helper_sve_ld2bb_r_mte,
4607 gen_helper_sve_ld3bb_r_mte,
4608 gen_helper_sve_ld4bb_r_mte },
4609 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4610 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4612
4613 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4614 { gen_helper_sve_ld1hh_le_r_mte,
4615 gen_helper_sve_ld2hh_le_r_mte,
4616 gen_helper_sve_ld3hh_le_r_mte,
4617 gen_helper_sve_ld4hh_le_r_mte },
4618 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4619 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4620
4621 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4622 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4623 { gen_helper_sve_ld1ss_le_r_mte,
4624 gen_helper_sve_ld2ss_le_r_mte,
4625 gen_helper_sve_ld3ss_le_r_mte,
4626 gen_helper_sve_ld4ss_le_r_mte },
4627 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4628
4629 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4630 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1dd_le_r_mte,
4633 gen_helper_sve_ld2dd_le_r_mte,
4634 gen_helper_sve_ld3dd_le_r_mte,
4635 gen_helper_sve_ld4dd_le_r_mte } },
4636
4637 /* mte active, big-endian */
4638 { { gen_helper_sve_ld1bb_r_mte,
4639 gen_helper_sve_ld2bb_r_mte,
4640 gen_helper_sve_ld3bb_r_mte,
4641 gen_helper_sve_ld4bb_r_mte },
4642 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4643 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4645
4646 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4647 { gen_helper_sve_ld1hh_be_r_mte,
4648 gen_helper_sve_ld2hh_be_r_mte,
4649 gen_helper_sve_ld3hh_be_r_mte,
4650 gen_helper_sve_ld4hh_be_r_mte },
4651 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4653
4654 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4655 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4656 { gen_helper_sve_ld1ss_be_r_mte,
4657 gen_helper_sve_ld2ss_be_r_mte,
4658 gen_helper_sve_ld3ss_be_r_mte,
4659 gen_helper_sve_ld4ss_be_r_mte },
4660 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4661
4662 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4663 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4664 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4665 { gen_helper_sve_ld1dd_be_r_mte,
4666 gen_helper_sve_ld2dd_be_r_mte,
4667 gen_helper_sve_ld3dd_be_r_mte,
4668 gen_helper_sve_ld4dd_be_r_mte } } },
4669};
4670
c4e7c493
RH
4671static void do_ld_zpa(DisasContext *s, int zt, int pg,
4672 TCGv_i64 addr, int dtype, int nreg)
4673{
206adacf 4674 gen_helper_gvec_mem *fn
c182c6db 4675 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4676
206adacf
RH
4677 /*
4678 * While there are holes in the table, they are not
c4e7c493
RH
4679 * accessible via the instruction encoding.
4680 */
4681 assert(fn != NULL);
206adacf 4682 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4683}
4684
3a7be554 4685static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493 4686{
1402a6b8 4687 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
c4e7c493
RH
4688 return false;
4689 }
4690 if (sve_access_check(s)) {
4691 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4692 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4693 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4694 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4695 }
4696 return true;
4697}
4698
3a7be554 4699static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493 4700{
1402a6b8
RH
4701 if (!dc_isar_feature(aa64_sve, s)) {
4702 return false;
4703 }
c4e7c493
RH
4704 if (sve_access_check(s)) {
4705 int vsz = vec_full_reg_size(s);
4706 int elements = vsz >> dtype_esz[a->dtype];
4707 TCGv_i64 addr = new_tmp_a64(s);
4708
4709 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4710 (a->imm * elements * (a->nreg + 1))
4711 << dtype_msz(a->dtype));
4712 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4713 }
4714 return true;
4715}
e2654d75 4716
3a7be554 4717static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4718{
aa13f7c3
RH
4719 static gen_helper_gvec_mem * const fns[2][2][16] = {
4720 { /* mte inactive, little-endian */
4721 { gen_helper_sve_ldff1bb_r,
4722 gen_helper_sve_ldff1bhu_r,
4723 gen_helper_sve_ldff1bsu_r,
4724 gen_helper_sve_ldff1bdu_r,
4725
4726 gen_helper_sve_ldff1sds_le_r,
4727 gen_helper_sve_ldff1hh_le_r,
4728 gen_helper_sve_ldff1hsu_le_r,
4729 gen_helper_sve_ldff1hdu_le_r,
4730
4731 gen_helper_sve_ldff1hds_le_r,
4732 gen_helper_sve_ldff1hss_le_r,
4733 gen_helper_sve_ldff1ss_le_r,
4734 gen_helper_sve_ldff1sdu_le_r,
4735
4736 gen_helper_sve_ldff1bds_r,
4737 gen_helper_sve_ldff1bss_r,
4738 gen_helper_sve_ldff1bhs_r,
4739 gen_helper_sve_ldff1dd_le_r },
4740
4741 /* mte inactive, big-endian */
4742 { gen_helper_sve_ldff1bb_r,
4743 gen_helper_sve_ldff1bhu_r,
4744 gen_helper_sve_ldff1bsu_r,
4745 gen_helper_sve_ldff1bdu_r,
4746
4747 gen_helper_sve_ldff1sds_be_r,
4748 gen_helper_sve_ldff1hh_be_r,
4749 gen_helper_sve_ldff1hsu_be_r,
4750 gen_helper_sve_ldff1hdu_be_r,
4751
4752 gen_helper_sve_ldff1hds_be_r,
4753 gen_helper_sve_ldff1hss_be_r,
4754 gen_helper_sve_ldff1ss_be_r,
4755 gen_helper_sve_ldff1sdu_be_r,
4756
4757 gen_helper_sve_ldff1bds_r,
4758 gen_helper_sve_ldff1bss_r,
4759 gen_helper_sve_ldff1bhs_r,
4760 gen_helper_sve_ldff1dd_be_r } },
4761
4762 { /* mte active, little-endian */
4763 { gen_helper_sve_ldff1bb_r_mte,
4764 gen_helper_sve_ldff1bhu_r_mte,
4765 gen_helper_sve_ldff1bsu_r_mte,
4766 gen_helper_sve_ldff1bdu_r_mte,
4767
4768 gen_helper_sve_ldff1sds_le_r_mte,
4769 gen_helper_sve_ldff1hh_le_r_mte,
4770 gen_helper_sve_ldff1hsu_le_r_mte,
4771 gen_helper_sve_ldff1hdu_le_r_mte,
4772
4773 gen_helper_sve_ldff1hds_le_r_mte,
4774 gen_helper_sve_ldff1hss_le_r_mte,
4775 gen_helper_sve_ldff1ss_le_r_mte,
4776 gen_helper_sve_ldff1sdu_le_r_mte,
4777
4778 gen_helper_sve_ldff1bds_r_mte,
4779 gen_helper_sve_ldff1bss_r_mte,
4780 gen_helper_sve_ldff1bhs_r_mte,
4781 gen_helper_sve_ldff1dd_le_r_mte },
4782
4783 /* mte active, big-endian */
4784 { gen_helper_sve_ldff1bb_r_mte,
4785 gen_helper_sve_ldff1bhu_r_mte,
4786 gen_helper_sve_ldff1bsu_r_mte,
4787 gen_helper_sve_ldff1bdu_r_mte,
4788
4789 gen_helper_sve_ldff1sds_be_r_mte,
4790 gen_helper_sve_ldff1hh_be_r_mte,
4791 gen_helper_sve_ldff1hsu_be_r_mte,
4792 gen_helper_sve_ldff1hdu_be_r_mte,
4793
4794 gen_helper_sve_ldff1hds_be_r_mte,
4795 gen_helper_sve_ldff1hss_be_r_mte,
4796 gen_helper_sve_ldff1ss_be_r_mte,
4797 gen_helper_sve_ldff1sdu_be_r_mte,
4798
4799 gen_helper_sve_ldff1bds_r_mte,
4800 gen_helper_sve_ldff1bss_r_mte,
4801 gen_helper_sve_ldff1bhs_r_mte,
4802 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4803 };
4804
1402a6b8
RH
4805 if (!dc_isar_feature(aa64_sve, s)) {
4806 return false;
4807 }
ccb1cefc 4808 s->is_nonstreaming = true;
e2654d75
RH
4809 if (sve_access_check(s)) {
4810 TCGv_i64 addr = new_tmp_a64(s);
4811 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4812 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4813 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4814 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4815 }
4816 return true;
4817}
4818
3a7be554 4819static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4820{
aa13f7c3
RH
4821 static gen_helper_gvec_mem * const fns[2][2][16] = {
4822 { /* mte inactive, little-endian */
4823 { gen_helper_sve_ldnf1bb_r,
4824 gen_helper_sve_ldnf1bhu_r,
4825 gen_helper_sve_ldnf1bsu_r,
4826 gen_helper_sve_ldnf1bdu_r,
4827
4828 gen_helper_sve_ldnf1sds_le_r,
4829 gen_helper_sve_ldnf1hh_le_r,
4830 gen_helper_sve_ldnf1hsu_le_r,
4831 gen_helper_sve_ldnf1hdu_le_r,
4832
4833 gen_helper_sve_ldnf1hds_le_r,
4834 gen_helper_sve_ldnf1hss_le_r,
4835 gen_helper_sve_ldnf1ss_le_r,
4836 gen_helper_sve_ldnf1sdu_le_r,
4837
4838 gen_helper_sve_ldnf1bds_r,
4839 gen_helper_sve_ldnf1bss_r,
4840 gen_helper_sve_ldnf1bhs_r,
4841 gen_helper_sve_ldnf1dd_le_r },
4842
4843 /* mte inactive, big-endian */
4844 { gen_helper_sve_ldnf1bb_r,
4845 gen_helper_sve_ldnf1bhu_r,
4846 gen_helper_sve_ldnf1bsu_r,
4847 gen_helper_sve_ldnf1bdu_r,
4848
4849 gen_helper_sve_ldnf1sds_be_r,
4850 gen_helper_sve_ldnf1hh_be_r,
4851 gen_helper_sve_ldnf1hsu_be_r,
4852 gen_helper_sve_ldnf1hdu_be_r,
4853
4854 gen_helper_sve_ldnf1hds_be_r,
4855 gen_helper_sve_ldnf1hss_be_r,
4856 gen_helper_sve_ldnf1ss_be_r,
4857 gen_helper_sve_ldnf1sdu_be_r,
4858
4859 gen_helper_sve_ldnf1bds_r,
4860 gen_helper_sve_ldnf1bss_r,
4861 gen_helper_sve_ldnf1bhs_r,
4862 gen_helper_sve_ldnf1dd_be_r } },
4863
4864 { /* mte inactive, little-endian */
4865 { gen_helper_sve_ldnf1bb_r_mte,
4866 gen_helper_sve_ldnf1bhu_r_mte,
4867 gen_helper_sve_ldnf1bsu_r_mte,
4868 gen_helper_sve_ldnf1bdu_r_mte,
4869
4870 gen_helper_sve_ldnf1sds_le_r_mte,
4871 gen_helper_sve_ldnf1hh_le_r_mte,
4872 gen_helper_sve_ldnf1hsu_le_r_mte,
4873 gen_helper_sve_ldnf1hdu_le_r_mte,
4874
4875 gen_helper_sve_ldnf1hds_le_r_mte,
4876 gen_helper_sve_ldnf1hss_le_r_mte,
4877 gen_helper_sve_ldnf1ss_le_r_mte,
4878 gen_helper_sve_ldnf1sdu_le_r_mte,
4879
4880 gen_helper_sve_ldnf1bds_r_mte,
4881 gen_helper_sve_ldnf1bss_r_mte,
4882 gen_helper_sve_ldnf1bhs_r_mte,
4883 gen_helper_sve_ldnf1dd_le_r_mte },
4884
4885 /* mte inactive, big-endian */
4886 { gen_helper_sve_ldnf1bb_r_mte,
4887 gen_helper_sve_ldnf1bhu_r_mte,
4888 gen_helper_sve_ldnf1bsu_r_mte,
4889 gen_helper_sve_ldnf1bdu_r_mte,
4890
4891 gen_helper_sve_ldnf1sds_be_r_mte,
4892 gen_helper_sve_ldnf1hh_be_r_mte,
4893 gen_helper_sve_ldnf1hsu_be_r_mte,
4894 gen_helper_sve_ldnf1hdu_be_r_mte,
4895
4896 gen_helper_sve_ldnf1hds_be_r_mte,
4897 gen_helper_sve_ldnf1hss_be_r_mte,
4898 gen_helper_sve_ldnf1ss_be_r_mte,
4899 gen_helper_sve_ldnf1sdu_be_r_mte,
4900
4901 gen_helper_sve_ldnf1bds_r_mte,
4902 gen_helper_sve_ldnf1bss_r_mte,
4903 gen_helper_sve_ldnf1bhs_r_mte,
4904 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4905 };
4906
1402a6b8
RH
4907 if (!dc_isar_feature(aa64_sve, s)) {
4908 return false;
4909 }
ccb1cefc 4910 s->is_nonstreaming = true;
e2654d75
RH
4911 if (sve_access_check(s)) {
4912 int vsz = vec_full_reg_size(s);
4913 int elements = vsz >> dtype_esz[a->dtype];
4914 int off = (a->imm * elements) << dtype_msz(a->dtype);
4915 TCGv_i64 addr = new_tmp_a64(s);
4916
4917 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4918 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4919 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4920 }
4921 return true;
4922}
1a039c7e 4923
c182c6db 4924static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 4925{
05abe304
RH
4926 unsigned vsz = vec_full_reg_size(s);
4927 TCGv_ptr t_pg;
7924d239 4928 int poff;
05abe304
RH
4929
4930 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
4931 poff = pred_full_reg_offset(s, pg);
4932 if (vsz > 16) {
4933 /*
4934 * Zero-extend the first 16 bits of the predicate into a temporary.
4935 * This avoids triggering an assert making sure we don't have bits
4936 * set within a predicate beyond VQ, but we have lowered VQ to 1
4937 * for this load operation.
4938 */
4939 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 4940#if HOST_BIG_ENDIAN
2a99ab2b
RH
4941 poff += 6;
4942#endif
4943 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4944
4945 poff = offsetof(CPUARMState, vfp.preg_tmp);
4946 tcg_gen_st_i64(tmp, cpu_env, poff);
4947 tcg_temp_free_i64(tmp);
4948 }
4949
05abe304 4950 t_pg = tcg_temp_new_ptr();
2a99ab2b 4951 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4952
c182c6db
RH
4953 gen_helper_gvec_mem *fn
4954 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 4955 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
4956
4957 tcg_temp_free_ptr(t_pg);
05abe304
RH
4958
4959 /* Replicate that first quadword. */
4960 if (vsz > 16) {
7924d239
RH
4961 int doff = vec_full_reg_offset(s, zt);
4962 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
4963 }
4964}
4965
3a7be554 4966static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304 4967{
1402a6b8 4968 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
05abe304
RH
4969 return false;
4970 }
4971 if (sve_access_check(s)) {
4972 int msz = dtype_msz(a->dtype);
4973 TCGv_i64 addr = new_tmp_a64(s);
4974 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4975 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 4976 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4977 }
4978 return true;
4979}
4980
3a7be554 4981static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304 4982{
1402a6b8
RH
4983 if (!dc_isar_feature(aa64_sve, s)) {
4984 return false;
4985 }
05abe304
RH
4986 if (sve_access_check(s)) {
4987 TCGv_i64 addr = new_tmp_a64(s);
4988 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 4989 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
4990 }
4991 return true;
4992}
4993
12c563f6
RH
4994static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4995{
4996 unsigned vsz = vec_full_reg_size(s);
4997 unsigned vsz_r32;
4998 TCGv_ptr t_pg;
4999 int poff, doff;
5000
5001 if (vsz < 32) {
5002 /*
5003 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5004 * in the ARM pseudocode, which is the sve_access_check() done
5005 * in our caller. We should not now return false from the caller.
5006 */
5007 unallocated_encoding(s);
5008 return;
5009 }
5010
5011 /* Load the first octaword using the normal predicated load helpers. */
5012
5013 poff = pred_full_reg_offset(s, pg);
5014 if (vsz > 32) {
5015 /*
5016 * Zero-extend the first 32 bits of the predicate into a temporary.
5017 * This avoids triggering an assert making sure we don't have bits
5018 * set within a predicate beyond VQ, but we have lowered VQ to 2
5019 * for this load operation.
5020 */
5021 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5022#if HOST_BIG_ENDIAN
12c563f6
RH
5023 poff += 4;
5024#endif
5025 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5026
5027 poff = offsetof(CPUARMState, vfp.preg_tmp);
5028 tcg_gen_st_i64(tmp, cpu_env, poff);
5029 tcg_temp_free_i64(tmp);
5030 }
5031
5032 t_pg = tcg_temp_new_ptr();
5033 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5034
5035 gen_helper_gvec_mem *fn
5036 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5037 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5038
5039 tcg_temp_free_ptr(t_pg);
5040
5041 /*
5042 * Replicate that first octaword.
5043 * The replication happens in units of 32; if the full vector size
5044 * is not a multiple of 32, the final bits are zeroed.
5045 */
5046 doff = vec_full_reg_offset(s, zt);
5047 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5048 if (vsz >= 64) {
5049 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5050 }
5051 vsz -= vsz_r32;
5052 if (vsz) {
5053 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5054 }
5055}
5056
5057static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5058{
5059 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5060 return false;
5061 }
5062 if (a->rm == 31) {
5063 return false;
5064 }
5065 if (sve_access_check(s)) {
5066 TCGv_i64 addr = new_tmp_a64(s);
5067 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5068 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5069 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5070 }
5071 return true;
5072}
5073
5074static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5075{
5076 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5077 return false;
5078 }
5079 if (sve_access_check(s)) {
5080 TCGv_i64 addr = new_tmp_a64(s);
5081 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5082 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5083 }
5084 return true;
5085}
5086
68459864 5087/* Load and broadcast element. */
3a7be554 5088static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5089{
68459864
RH
5090 unsigned vsz = vec_full_reg_size(s);
5091 unsigned psz = pred_full_reg_size(s);
5092 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5093 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5094 TCGLabel *over;
4ac430e1 5095 TCGv_i64 temp, clean_addr;
68459864 5096
1402a6b8
RH
5097 if (!dc_isar_feature(aa64_sve, s)) {
5098 return false;
5099 }
c0ed9166
RH
5100 if (!sve_access_check(s)) {
5101 return true;
5102 }
5103
5104 over = gen_new_label();
5105
68459864
RH
5106 /* If the guarding predicate has no bits set, no load occurs. */
5107 if (psz <= 8) {
5108 /* Reduce the pred_esz_masks value simply to reduce the
5109 * size of the code generated here.
5110 */
5111 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5112 temp = tcg_temp_new_i64();
5113 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5114 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5115 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5116 tcg_temp_free_i64(temp);
5117 } else {
5118 TCGv_i32 t32 = tcg_temp_new_i32();
5119 find_last_active(s, t32, esz, a->pg);
5120 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5121 tcg_temp_free_i32(t32);
5122 }
5123
5124 /* Load the data. */
5125 temp = tcg_temp_new_i64();
d0e372b0 5126 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5127 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5128
5129 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5130 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5131
5132 /* Broadcast to *all* elements. */
5133 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5134 vsz, vsz, temp);
5135 tcg_temp_free_i64(temp);
5136
5137 /* Zero the inactive elements. */
5138 gen_set_label(over);
60245996 5139 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5140}
5141
1a039c7e
RH
5142static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5143 int msz, int esz, int nreg)
5144{
71b9f394
RH
5145 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5146 { { { gen_helper_sve_st1bb_r,
5147 gen_helper_sve_st1bh_r,
5148 gen_helper_sve_st1bs_r,
5149 gen_helper_sve_st1bd_r },
5150 { NULL,
5151 gen_helper_sve_st1hh_le_r,
5152 gen_helper_sve_st1hs_le_r,
5153 gen_helper_sve_st1hd_le_r },
5154 { NULL, NULL,
5155 gen_helper_sve_st1ss_le_r,
5156 gen_helper_sve_st1sd_le_r },
5157 { NULL, NULL, NULL,
5158 gen_helper_sve_st1dd_le_r } },
5159 { { gen_helper_sve_st1bb_r,
5160 gen_helper_sve_st1bh_r,
5161 gen_helper_sve_st1bs_r,
5162 gen_helper_sve_st1bd_r },
5163 { NULL,
5164 gen_helper_sve_st1hh_be_r,
5165 gen_helper_sve_st1hs_be_r,
5166 gen_helper_sve_st1hd_be_r },
5167 { NULL, NULL,
5168 gen_helper_sve_st1ss_be_r,
5169 gen_helper_sve_st1sd_be_r },
5170 { NULL, NULL, NULL,
5171 gen_helper_sve_st1dd_be_r } } },
5172
5173 { { { gen_helper_sve_st1bb_r_mte,
5174 gen_helper_sve_st1bh_r_mte,
5175 gen_helper_sve_st1bs_r_mte,
5176 gen_helper_sve_st1bd_r_mte },
5177 { NULL,
5178 gen_helper_sve_st1hh_le_r_mte,
5179 gen_helper_sve_st1hs_le_r_mte,
5180 gen_helper_sve_st1hd_le_r_mte },
5181 { NULL, NULL,
5182 gen_helper_sve_st1ss_le_r_mte,
5183 gen_helper_sve_st1sd_le_r_mte },
5184 { NULL, NULL, NULL,
5185 gen_helper_sve_st1dd_le_r_mte } },
5186 { { gen_helper_sve_st1bb_r_mte,
5187 gen_helper_sve_st1bh_r_mte,
5188 gen_helper_sve_st1bs_r_mte,
5189 gen_helper_sve_st1bd_r_mte },
5190 { NULL,
5191 gen_helper_sve_st1hh_be_r_mte,
5192 gen_helper_sve_st1hs_be_r_mte,
5193 gen_helper_sve_st1hd_be_r_mte },
5194 { NULL, NULL,
5195 gen_helper_sve_st1ss_be_r_mte,
5196 gen_helper_sve_st1sd_be_r_mte },
5197 { NULL, NULL, NULL,
5198 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5199 };
71b9f394
RH
5200 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5201 { { { gen_helper_sve_st2bb_r,
5202 gen_helper_sve_st2hh_le_r,
5203 gen_helper_sve_st2ss_le_r,
5204 gen_helper_sve_st2dd_le_r },
5205 { gen_helper_sve_st3bb_r,
5206 gen_helper_sve_st3hh_le_r,
5207 gen_helper_sve_st3ss_le_r,
5208 gen_helper_sve_st3dd_le_r },
5209 { gen_helper_sve_st4bb_r,
5210 gen_helper_sve_st4hh_le_r,
5211 gen_helper_sve_st4ss_le_r,
5212 gen_helper_sve_st4dd_le_r } },
5213 { { gen_helper_sve_st2bb_r,
5214 gen_helper_sve_st2hh_be_r,
5215 gen_helper_sve_st2ss_be_r,
5216 gen_helper_sve_st2dd_be_r },
5217 { gen_helper_sve_st3bb_r,
5218 gen_helper_sve_st3hh_be_r,
5219 gen_helper_sve_st3ss_be_r,
5220 gen_helper_sve_st3dd_be_r },
5221 { gen_helper_sve_st4bb_r,
5222 gen_helper_sve_st4hh_be_r,
5223 gen_helper_sve_st4ss_be_r,
5224 gen_helper_sve_st4dd_be_r } } },
5225 { { { gen_helper_sve_st2bb_r_mte,
5226 gen_helper_sve_st2hh_le_r_mte,
5227 gen_helper_sve_st2ss_le_r_mte,
5228 gen_helper_sve_st2dd_le_r_mte },
5229 { gen_helper_sve_st3bb_r_mte,
5230 gen_helper_sve_st3hh_le_r_mte,
5231 gen_helper_sve_st3ss_le_r_mte,
5232 gen_helper_sve_st3dd_le_r_mte },
5233 { gen_helper_sve_st4bb_r_mte,
5234 gen_helper_sve_st4hh_le_r_mte,
5235 gen_helper_sve_st4ss_le_r_mte,
5236 gen_helper_sve_st4dd_le_r_mte } },
5237 { { gen_helper_sve_st2bb_r_mte,
5238 gen_helper_sve_st2hh_be_r_mte,
5239 gen_helper_sve_st2ss_be_r_mte,
5240 gen_helper_sve_st2dd_be_r_mte },
5241 { gen_helper_sve_st3bb_r_mte,
5242 gen_helper_sve_st3hh_be_r_mte,
5243 gen_helper_sve_st3ss_be_r_mte,
5244 gen_helper_sve_st3dd_be_r_mte },
5245 { gen_helper_sve_st4bb_r_mte,
5246 gen_helper_sve_st4hh_be_r_mte,
5247 gen_helper_sve_st4ss_be_r_mte,
5248 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5249 };
5250 gen_helper_gvec_mem *fn;
28d57f2d 5251 int be = s->be_data == MO_BE;
1a039c7e
RH
5252
5253 if (nreg == 0) {
5254 /* ST1 */
71b9f394
RH
5255 fn = fn_single[s->mte_active[0]][be][msz][esz];
5256 nreg = 1;
1a039c7e
RH
5257 } else {
5258 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5259 assert(msz == esz);
71b9f394 5260 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5261 }
5262 assert(fn != NULL);
71b9f394 5263 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5264}
5265
3a7be554 5266static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e 5267{
1402a6b8
RH
5268 if (!dc_isar_feature(aa64_sve, s)) {
5269 return false;
5270 }
1a039c7e
RH
5271 if (a->rm == 31 || a->msz > a->esz) {
5272 return false;
5273 }
5274 if (sve_access_check(s)) {
5275 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5276 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5277 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5278 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5279 }
5280 return true;
5281}
5282
3a7be554 5283static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e 5284{
1402a6b8
RH
5285 if (!dc_isar_feature(aa64_sve, s)) {
5286 return false;
5287 }
1a039c7e
RH
5288 if (a->msz > a->esz) {
5289 return false;
5290 }
5291 if (sve_access_check(s)) {
5292 int vsz = vec_full_reg_size(s);
5293 int elements = vsz >> a->esz;
5294 TCGv_i64 addr = new_tmp_a64(s);
5295
5296 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5297 (a->imm * elements * (a->nreg + 1)) << a->msz);
5298 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5299 }
5300 return true;
5301}
f6dbf62a
RH
5302
5303/*
5304 *** SVE gather loads / scatter stores
5305 */
5306
500d0484 5307static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5308 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5309 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5310{
5311 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5312 TCGv_ptr t_zm = tcg_temp_new_ptr();
5313 TCGv_ptr t_pg = tcg_temp_new_ptr();
5314 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5315 int desc = 0;
500d0484 5316
d28d12f0
RH
5317 if (s->mte_active[0]) {
5318 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5319 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5320 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5321 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5322 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5323 desc <<= SVE_MTEDESC_SHIFT;
5324 }
cdecb3fc 5325 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5326
5327 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5328 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5329 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5330 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5331
5332 tcg_temp_free_ptr(t_zt);
5333 tcg_temp_free_ptr(t_zm);
5334 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5335}
5336
d28d12f0
RH
5337/* Indexed by [mte][be][ff][xs][u][msz]. */
5338static gen_helper_gvec_mem_scatter * const
5339gather_load_fn32[2][2][2][2][2][3] = {
5340 { /* MTE Inactive */
5341 { /* Little-endian */
5342 { { { gen_helper_sve_ldbss_zsu,
5343 gen_helper_sve_ldhss_le_zsu,
5344 NULL, },
5345 { gen_helper_sve_ldbsu_zsu,
5346 gen_helper_sve_ldhsu_le_zsu,
5347 gen_helper_sve_ldss_le_zsu, } },
5348 { { gen_helper_sve_ldbss_zss,
5349 gen_helper_sve_ldhss_le_zss,
5350 NULL, },
5351 { gen_helper_sve_ldbsu_zss,
5352 gen_helper_sve_ldhsu_le_zss,
5353 gen_helper_sve_ldss_le_zss, } } },
5354
5355 /* First-fault */
5356 { { { gen_helper_sve_ldffbss_zsu,
5357 gen_helper_sve_ldffhss_le_zsu,
5358 NULL, },
5359 { gen_helper_sve_ldffbsu_zsu,
5360 gen_helper_sve_ldffhsu_le_zsu,
5361 gen_helper_sve_ldffss_le_zsu, } },
5362 { { gen_helper_sve_ldffbss_zss,
5363 gen_helper_sve_ldffhss_le_zss,
5364 NULL, },
5365 { gen_helper_sve_ldffbsu_zss,
5366 gen_helper_sve_ldffhsu_le_zss,
5367 gen_helper_sve_ldffss_le_zss, } } } },
5368
5369 { /* Big-endian */
5370 { { { gen_helper_sve_ldbss_zsu,
5371 gen_helper_sve_ldhss_be_zsu,
5372 NULL, },
5373 { gen_helper_sve_ldbsu_zsu,
5374 gen_helper_sve_ldhsu_be_zsu,
5375 gen_helper_sve_ldss_be_zsu, } },
5376 { { gen_helper_sve_ldbss_zss,
5377 gen_helper_sve_ldhss_be_zss,
5378 NULL, },
5379 { gen_helper_sve_ldbsu_zss,
5380 gen_helper_sve_ldhsu_be_zss,
5381 gen_helper_sve_ldss_be_zss, } } },
5382
5383 /* First-fault */
5384 { { { gen_helper_sve_ldffbss_zsu,
5385 gen_helper_sve_ldffhss_be_zsu,
5386 NULL, },
5387 { gen_helper_sve_ldffbsu_zsu,
5388 gen_helper_sve_ldffhsu_be_zsu,
5389 gen_helper_sve_ldffss_be_zsu, } },
5390 { { gen_helper_sve_ldffbss_zss,
5391 gen_helper_sve_ldffhss_be_zss,
5392 NULL, },
5393 { gen_helper_sve_ldffbsu_zss,
5394 gen_helper_sve_ldffhsu_be_zss,
5395 gen_helper_sve_ldffss_be_zss, } } } } },
5396 { /* MTE Active */
5397 { /* Little-endian */
5398 { { { gen_helper_sve_ldbss_zsu_mte,
5399 gen_helper_sve_ldhss_le_zsu_mte,
5400 NULL, },
5401 { gen_helper_sve_ldbsu_zsu_mte,
5402 gen_helper_sve_ldhsu_le_zsu_mte,
5403 gen_helper_sve_ldss_le_zsu_mte, } },
5404 { { gen_helper_sve_ldbss_zss_mte,
5405 gen_helper_sve_ldhss_le_zss_mte,
5406 NULL, },
5407 { gen_helper_sve_ldbsu_zss_mte,
5408 gen_helper_sve_ldhsu_le_zss_mte,
5409 gen_helper_sve_ldss_le_zss_mte, } } },
5410
5411 /* First-fault */
5412 { { { gen_helper_sve_ldffbss_zsu_mte,
5413 gen_helper_sve_ldffhss_le_zsu_mte,
5414 NULL, },
5415 { gen_helper_sve_ldffbsu_zsu_mte,
5416 gen_helper_sve_ldffhsu_le_zsu_mte,
5417 gen_helper_sve_ldffss_le_zsu_mte, } },
5418 { { gen_helper_sve_ldffbss_zss_mte,
5419 gen_helper_sve_ldffhss_le_zss_mte,
5420 NULL, },
5421 { gen_helper_sve_ldffbsu_zss_mte,
5422 gen_helper_sve_ldffhsu_le_zss_mte,
5423 gen_helper_sve_ldffss_le_zss_mte, } } } },
5424
5425 { /* Big-endian */
5426 { { { gen_helper_sve_ldbss_zsu_mte,
5427 gen_helper_sve_ldhss_be_zsu_mte,
5428 NULL, },
5429 { gen_helper_sve_ldbsu_zsu_mte,
5430 gen_helper_sve_ldhsu_be_zsu_mte,
5431 gen_helper_sve_ldss_be_zsu_mte, } },
5432 { { gen_helper_sve_ldbss_zss_mte,
5433 gen_helper_sve_ldhss_be_zss_mte,
5434 NULL, },
5435 { gen_helper_sve_ldbsu_zss_mte,
5436 gen_helper_sve_ldhsu_be_zss_mte,
5437 gen_helper_sve_ldss_be_zss_mte, } } },
5438
5439 /* First-fault */
5440 { { { gen_helper_sve_ldffbss_zsu_mte,
5441 gen_helper_sve_ldffhss_be_zsu_mte,
5442 NULL, },
5443 { gen_helper_sve_ldffbsu_zsu_mte,
5444 gen_helper_sve_ldffhsu_be_zsu_mte,
5445 gen_helper_sve_ldffss_be_zsu_mte, } },
5446 { { gen_helper_sve_ldffbss_zss_mte,
5447 gen_helper_sve_ldffhss_be_zss_mte,
5448 NULL, },
5449 { gen_helper_sve_ldffbsu_zss_mte,
5450 gen_helper_sve_ldffhsu_be_zss_mte,
5451 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5452};
5453
5454/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5455static gen_helper_gvec_mem_scatter * const
5456gather_load_fn64[2][2][2][3][2][4] = {
5457 { /* MTE Inactive */
5458 { /* Little-endian */
5459 { { { gen_helper_sve_ldbds_zsu,
5460 gen_helper_sve_ldhds_le_zsu,
5461 gen_helper_sve_ldsds_le_zsu,
5462 NULL, },
5463 { gen_helper_sve_ldbdu_zsu,
5464 gen_helper_sve_ldhdu_le_zsu,
5465 gen_helper_sve_ldsdu_le_zsu,
5466 gen_helper_sve_lddd_le_zsu, } },
5467 { { gen_helper_sve_ldbds_zss,
5468 gen_helper_sve_ldhds_le_zss,
5469 gen_helper_sve_ldsds_le_zss,
5470 NULL, },
5471 { gen_helper_sve_ldbdu_zss,
5472 gen_helper_sve_ldhdu_le_zss,
5473 gen_helper_sve_ldsdu_le_zss,
5474 gen_helper_sve_lddd_le_zss, } },
5475 { { gen_helper_sve_ldbds_zd,
5476 gen_helper_sve_ldhds_le_zd,
5477 gen_helper_sve_ldsds_le_zd,
5478 NULL, },
5479 { gen_helper_sve_ldbdu_zd,
5480 gen_helper_sve_ldhdu_le_zd,
5481 gen_helper_sve_ldsdu_le_zd,
5482 gen_helper_sve_lddd_le_zd, } } },
5483
5484 /* First-fault */
5485 { { { gen_helper_sve_ldffbds_zsu,
5486 gen_helper_sve_ldffhds_le_zsu,
5487 gen_helper_sve_ldffsds_le_zsu,
5488 NULL, },
5489 { gen_helper_sve_ldffbdu_zsu,
5490 gen_helper_sve_ldffhdu_le_zsu,
5491 gen_helper_sve_ldffsdu_le_zsu,
5492 gen_helper_sve_ldffdd_le_zsu, } },
5493 { { gen_helper_sve_ldffbds_zss,
5494 gen_helper_sve_ldffhds_le_zss,
5495 gen_helper_sve_ldffsds_le_zss,
5496 NULL, },
5497 { gen_helper_sve_ldffbdu_zss,
5498 gen_helper_sve_ldffhdu_le_zss,
5499 gen_helper_sve_ldffsdu_le_zss,
5500 gen_helper_sve_ldffdd_le_zss, } },
5501 { { gen_helper_sve_ldffbds_zd,
5502 gen_helper_sve_ldffhds_le_zd,
5503 gen_helper_sve_ldffsds_le_zd,
5504 NULL, },
5505 { gen_helper_sve_ldffbdu_zd,
5506 gen_helper_sve_ldffhdu_le_zd,
5507 gen_helper_sve_ldffsdu_le_zd,
5508 gen_helper_sve_ldffdd_le_zd, } } } },
5509 { /* Big-endian */
5510 { { { gen_helper_sve_ldbds_zsu,
5511 gen_helper_sve_ldhds_be_zsu,
5512 gen_helper_sve_ldsds_be_zsu,
5513 NULL, },
5514 { gen_helper_sve_ldbdu_zsu,
5515 gen_helper_sve_ldhdu_be_zsu,
5516 gen_helper_sve_ldsdu_be_zsu,
5517 gen_helper_sve_lddd_be_zsu, } },
5518 { { gen_helper_sve_ldbds_zss,
5519 gen_helper_sve_ldhds_be_zss,
5520 gen_helper_sve_ldsds_be_zss,
5521 NULL, },
5522 { gen_helper_sve_ldbdu_zss,
5523 gen_helper_sve_ldhdu_be_zss,
5524 gen_helper_sve_ldsdu_be_zss,
5525 gen_helper_sve_lddd_be_zss, } },
5526 { { gen_helper_sve_ldbds_zd,
5527 gen_helper_sve_ldhds_be_zd,
5528 gen_helper_sve_ldsds_be_zd,
5529 NULL, },
5530 { gen_helper_sve_ldbdu_zd,
5531 gen_helper_sve_ldhdu_be_zd,
5532 gen_helper_sve_ldsdu_be_zd,
5533 gen_helper_sve_lddd_be_zd, } } },
5534
5535 /* First-fault */
5536 { { { gen_helper_sve_ldffbds_zsu,
5537 gen_helper_sve_ldffhds_be_zsu,
5538 gen_helper_sve_ldffsds_be_zsu,
5539 NULL, },
5540 { gen_helper_sve_ldffbdu_zsu,
5541 gen_helper_sve_ldffhdu_be_zsu,
5542 gen_helper_sve_ldffsdu_be_zsu,
5543 gen_helper_sve_ldffdd_be_zsu, } },
5544 { { gen_helper_sve_ldffbds_zss,
5545 gen_helper_sve_ldffhds_be_zss,
5546 gen_helper_sve_ldffsds_be_zss,
5547 NULL, },
5548 { gen_helper_sve_ldffbdu_zss,
5549 gen_helper_sve_ldffhdu_be_zss,
5550 gen_helper_sve_ldffsdu_be_zss,
5551 gen_helper_sve_ldffdd_be_zss, } },
5552 { { gen_helper_sve_ldffbds_zd,
5553 gen_helper_sve_ldffhds_be_zd,
5554 gen_helper_sve_ldffsds_be_zd,
5555 NULL, },
5556 { gen_helper_sve_ldffbdu_zd,
5557 gen_helper_sve_ldffhdu_be_zd,
5558 gen_helper_sve_ldffsdu_be_zd,
5559 gen_helper_sve_ldffdd_be_zd, } } } } },
5560 { /* MTE Active */
5561 { /* Little-endian */
5562 { { { gen_helper_sve_ldbds_zsu_mte,
5563 gen_helper_sve_ldhds_le_zsu_mte,
5564 gen_helper_sve_ldsds_le_zsu_mte,
5565 NULL, },
5566 { gen_helper_sve_ldbdu_zsu_mte,
5567 gen_helper_sve_ldhdu_le_zsu_mte,
5568 gen_helper_sve_ldsdu_le_zsu_mte,
5569 gen_helper_sve_lddd_le_zsu_mte, } },
5570 { { gen_helper_sve_ldbds_zss_mte,
5571 gen_helper_sve_ldhds_le_zss_mte,
5572 gen_helper_sve_ldsds_le_zss_mte,
5573 NULL, },
5574 { gen_helper_sve_ldbdu_zss_mte,
5575 gen_helper_sve_ldhdu_le_zss_mte,
5576 gen_helper_sve_ldsdu_le_zss_mte,
5577 gen_helper_sve_lddd_le_zss_mte, } },
5578 { { gen_helper_sve_ldbds_zd_mte,
5579 gen_helper_sve_ldhds_le_zd_mte,
5580 gen_helper_sve_ldsds_le_zd_mte,
5581 NULL, },
5582 { gen_helper_sve_ldbdu_zd_mte,
5583 gen_helper_sve_ldhdu_le_zd_mte,
5584 gen_helper_sve_ldsdu_le_zd_mte,
5585 gen_helper_sve_lddd_le_zd_mte, } } },
5586
5587 /* First-fault */
5588 { { { gen_helper_sve_ldffbds_zsu_mte,
5589 gen_helper_sve_ldffhds_le_zsu_mte,
5590 gen_helper_sve_ldffsds_le_zsu_mte,
5591 NULL, },
5592 { gen_helper_sve_ldffbdu_zsu_mte,
5593 gen_helper_sve_ldffhdu_le_zsu_mte,
5594 gen_helper_sve_ldffsdu_le_zsu_mte,
5595 gen_helper_sve_ldffdd_le_zsu_mte, } },
5596 { { gen_helper_sve_ldffbds_zss_mte,
5597 gen_helper_sve_ldffhds_le_zss_mte,
5598 gen_helper_sve_ldffsds_le_zss_mte,
5599 NULL, },
5600 { gen_helper_sve_ldffbdu_zss_mte,
5601 gen_helper_sve_ldffhdu_le_zss_mte,
5602 gen_helper_sve_ldffsdu_le_zss_mte,
5603 gen_helper_sve_ldffdd_le_zss_mte, } },
5604 { { gen_helper_sve_ldffbds_zd_mte,
5605 gen_helper_sve_ldffhds_le_zd_mte,
5606 gen_helper_sve_ldffsds_le_zd_mte,
5607 NULL, },
5608 { gen_helper_sve_ldffbdu_zd_mte,
5609 gen_helper_sve_ldffhdu_le_zd_mte,
5610 gen_helper_sve_ldffsdu_le_zd_mte,
5611 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5612 { /* Big-endian */
5613 { { { gen_helper_sve_ldbds_zsu_mte,
5614 gen_helper_sve_ldhds_be_zsu_mte,
5615 gen_helper_sve_ldsds_be_zsu_mte,
5616 NULL, },
5617 { gen_helper_sve_ldbdu_zsu_mte,
5618 gen_helper_sve_ldhdu_be_zsu_mte,
5619 gen_helper_sve_ldsdu_be_zsu_mte,
5620 gen_helper_sve_lddd_be_zsu_mte, } },
5621 { { gen_helper_sve_ldbds_zss_mte,
5622 gen_helper_sve_ldhds_be_zss_mte,
5623 gen_helper_sve_ldsds_be_zss_mte,
5624 NULL, },
5625 { gen_helper_sve_ldbdu_zss_mte,
5626 gen_helper_sve_ldhdu_be_zss_mte,
5627 gen_helper_sve_ldsdu_be_zss_mte,
5628 gen_helper_sve_lddd_be_zss_mte, } },
5629 { { gen_helper_sve_ldbds_zd_mte,
5630 gen_helper_sve_ldhds_be_zd_mte,
5631 gen_helper_sve_ldsds_be_zd_mte,
5632 NULL, },
5633 { gen_helper_sve_ldbdu_zd_mte,
5634 gen_helper_sve_ldhdu_be_zd_mte,
5635 gen_helper_sve_ldsdu_be_zd_mte,
5636 gen_helper_sve_lddd_be_zd_mte, } } },
5637
5638 /* First-fault */
5639 { { { gen_helper_sve_ldffbds_zsu_mte,
5640 gen_helper_sve_ldffhds_be_zsu_mte,
5641 gen_helper_sve_ldffsds_be_zsu_mte,
5642 NULL, },
5643 { gen_helper_sve_ldffbdu_zsu_mte,
5644 gen_helper_sve_ldffhdu_be_zsu_mte,
5645 gen_helper_sve_ldffsdu_be_zsu_mte,
5646 gen_helper_sve_ldffdd_be_zsu_mte, } },
5647 { { gen_helper_sve_ldffbds_zss_mte,
5648 gen_helper_sve_ldffhds_be_zss_mte,
5649 gen_helper_sve_ldffsds_be_zss_mte,
5650 NULL, },
5651 { gen_helper_sve_ldffbdu_zss_mte,
5652 gen_helper_sve_ldffhdu_be_zss_mte,
5653 gen_helper_sve_ldffsdu_be_zss_mte,
5654 gen_helper_sve_ldffdd_be_zss_mte, } },
5655 { { gen_helper_sve_ldffbds_zd_mte,
5656 gen_helper_sve_ldffhds_be_zd_mte,
5657 gen_helper_sve_ldffsds_be_zd_mte,
5658 NULL, },
5659 { gen_helper_sve_ldffbdu_zd_mte,
5660 gen_helper_sve_ldffhdu_be_zd_mte,
5661 gen_helper_sve_ldffsdu_be_zd_mte,
5662 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5663};
5664
3a7be554 5665static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5666{
5667 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5668 bool be = s->be_data == MO_BE;
5669 bool mte = s->mte_active[0];
673e9fa6 5670
1402a6b8
RH
5671 if (!dc_isar_feature(aa64_sve, s)) {
5672 return false;
5673 }
765ff97d 5674 s->is_nonstreaming = true;
673e9fa6
RH
5675 if (!sve_access_check(s)) {
5676 return true;
5677 }
5678
5679 switch (a->esz) {
5680 case MO_32:
d28d12f0 5681 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5682 break;
5683 case MO_64:
d28d12f0 5684 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5685 break;
5686 }
5687 assert(fn != NULL);
5688
5689 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5690 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5691 return true;
5692}
5693
3a7be554 5694static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5695{
5696 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5697 bool be = s->be_data == MO_BE;
5698 bool mte = s->mte_active[0];
673e9fa6
RH
5699
5700 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5701 return false;
5702 }
1402a6b8
RH
5703 if (!dc_isar_feature(aa64_sve, s)) {
5704 return false;
5705 }
765ff97d 5706 s->is_nonstreaming = true;
673e9fa6
RH
5707 if (!sve_access_check(s)) {
5708 return true;
5709 }
5710
5711 switch (a->esz) {
5712 case MO_32:
d28d12f0 5713 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5714 break;
5715 case MO_64:
d28d12f0 5716 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5717 break;
5718 }
5719 assert(fn != NULL);
5720
5721 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5722 * by loading the immediate into the scalar parameter.
5723 */
2ccdf94f
RH
5724 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5725 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5726 return true;
5727}
5728
cf327449
SL
5729static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5730{
b17ab470
RH
5731 gen_helper_gvec_mem_scatter *fn = NULL;
5732 bool be = s->be_data == MO_BE;
5733 bool mte = s->mte_active[0];
5734
5735 if (a->esz < a->msz + !a->u) {
5736 return false;
5737 }
cf327449
SL
5738 if (!dc_isar_feature(aa64_sve2, s)) {
5739 return false;
5740 }
765ff97d 5741 s->is_nonstreaming = true;
b17ab470
RH
5742 if (!sve_access_check(s)) {
5743 return true;
5744 }
5745
5746 switch (a->esz) {
5747 case MO_32:
5748 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5749 break;
5750 case MO_64:
5751 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5752 break;
5753 }
5754 assert(fn != NULL);
5755
5756 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5757 cpu_reg(s, a->rm), a->msz, false, fn);
5758 return true;
cf327449
SL
5759}
5760
d28d12f0
RH
5761/* Indexed by [mte][be][xs][msz]. */
5762static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5763 { /* MTE Inactive */
5764 { /* Little-endian */
5765 { gen_helper_sve_stbs_zsu,
5766 gen_helper_sve_sths_le_zsu,
5767 gen_helper_sve_stss_le_zsu, },
5768 { gen_helper_sve_stbs_zss,
5769 gen_helper_sve_sths_le_zss,
5770 gen_helper_sve_stss_le_zss, } },
5771 { /* Big-endian */
5772 { gen_helper_sve_stbs_zsu,
5773 gen_helper_sve_sths_be_zsu,
5774 gen_helper_sve_stss_be_zsu, },
5775 { gen_helper_sve_stbs_zss,
5776 gen_helper_sve_sths_be_zss,
5777 gen_helper_sve_stss_be_zss, } } },
5778 { /* MTE Active */
5779 { /* Little-endian */
5780 { gen_helper_sve_stbs_zsu_mte,
5781 gen_helper_sve_sths_le_zsu_mte,
5782 gen_helper_sve_stss_le_zsu_mte, },
5783 { gen_helper_sve_stbs_zss_mte,
5784 gen_helper_sve_sths_le_zss_mte,
5785 gen_helper_sve_stss_le_zss_mte, } },
5786 { /* Big-endian */
5787 { gen_helper_sve_stbs_zsu_mte,
5788 gen_helper_sve_sths_be_zsu_mte,
5789 gen_helper_sve_stss_be_zsu_mte, },
5790 { gen_helper_sve_stbs_zss_mte,
5791 gen_helper_sve_sths_be_zss_mte,
5792 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5793};
5794
5795/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5796static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5797 { /* MTE Inactive */
5798 { /* Little-endian */
5799 { gen_helper_sve_stbd_zsu,
5800 gen_helper_sve_sthd_le_zsu,
5801 gen_helper_sve_stsd_le_zsu,
5802 gen_helper_sve_stdd_le_zsu, },
5803 { gen_helper_sve_stbd_zss,
5804 gen_helper_sve_sthd_le_zss,
5805 gen_helper_sve_stsd_le_zss,
5806 gen_helper_sve_stdd_le_zss, },
5807 { gen_helper_sve_stbd_zd,
5808 gen_helper_sve_sthd_le_zd,
5809 gen_helper_sve_stsd_le_zd,
5810 gen_helper_sve_stdd_le_zd, } },
5811 { /* Big-endian */
5812 { gen_helper_sve_stbd_zsu,
5813 gen_helper_sve_sthd_be_zsu,
5814 gen_helper_sve_stsd_be_zsu,
5815 gen_helper_sve_stdd_be_zsu, },
5816 { gen_helper_sve_stbd_zss,
5817 gen_helper_sve_sthd_be_zss,
5818 gen_helper_sve_stsd_be_zss,
5819 gen_helper_sve_stdd_be_zss, },
5820 { gen_helper_sve_stbd_zd,
5821 gen_helper_sve_sthd_be_zd,
5822 gen_helper_sve_stsd_be_zd,
5823 gen_helper_sve_stdd_be_zd, } } },
5824 { /* MTE Inactive */
5825 { /* Little-endian */
5826 { gen_helper_sve_stbd_zsu_mte,
5827 gen_helper_sve_sthd_le_zsu_mte,
5828 gen_helper_sve_stsd_le_zsu_mte,
5829 gen_helper_sve_stdd_le_zsu_mte, },
5830 { gen_helper_sve_stbd_zss_mte,
5831 gen_helper_sve_sthd_le_zss_mte,
5832 gen_helper_sve_stsd_le_zss_mte,
5833 gen_helper_sve_stdd_le_zss_mte, },
5834 { gen_helper_sve_stbd_zd_mte,
5835 gen_helper_sve_sthd_le_zd_mte,
5836 gen_helper_sve_stsd_le_zd_mte,
5837 gen_helper_sve_stdd_le_zd_mte, } },
5838 { /* Big-endian */
5839 { gen_helper_sve_stbd_zsu_mte,
5840 gen_helper_sve_sthd_be_zsu_mte,
5841 gen_helper_sve_stsd_be_zsu_mte,
5842 gen_helper_sve_stdd_be_zsu_mte, },
5843 { gen_helper_sve_stbd_zss_mte,
5844 gen_helper_sve_sthd_be_zss_mte,
5845 gen_helper_sve_stsd_be_zss_mte,
5846 gen_helper_sve_stdd_be_zss_mte, },
5847 { gen_helper_sve_stbd_zd_mte,
5848 gen_helper_sve_sthd_be_zd_mte,
5849 gen_helper_sve_stsd_be_zd_mte,
5850 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5851};
5852
3a7be554 5853static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5854{
f6dbf62a 5855 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5856 bool be = s->be_data == MO_BE;
5857 bool mte = s->mte_active[0];
f6dbf62a
RH
5858
5859 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5860 return false;
5861 }
1402a6b8
RH
5862 if (!dc_isar_feature(aa64_sve, s)) {
5863 return false;
5864 }
765ff97d 5865 s->is_nonstreaming = true;
f6dbf62a
RH
5866 if (!sve_access_check(s)) {
5867 return true;
5868 }
5869 switch (a->esz) {
5870 case MO_32:
d28d12f0 5871 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5872 break;
5873 case MO_64:
d28d12f0 5874 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5875 break;
5876 default:
5877 g_assert_not_reached();
5878 }
5879 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5880 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5881 return true;
5882}
dec6cf6b 5883
3a7be554 5884static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5885{
5886 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5887 bool be = s->be_data == MO_BE;
5888 bool mte = s->mte_active[0];
408ecde9
RH
5889
5890 if (a->esz < a->msz) {
5891 return false;
5892 }
1402a6b8
RH
5893 if (!dc_isar_feature(aa64_sve, s)) {
5894 return false;
5895 }
765ff97d 5896 s->is_nonstreaming = true;
408ecde9
RH
5897 if (!sve_access_check(s)) {
5898 return true;
5899 }
5900
5901 switch (a->esz) {
5902 case MO_32:
d28d12f0 5903 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5904 break;
5905 case MO_64:
d28d12f0 5906 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5907 break;
5908 }
5909 assert(fn != NULL);
5910
5911 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5912 * by loading the immediate into the scalar parameter.
5913 */
2ccdf94f
RH
5914 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5915 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5916 return true;
5917}
5918
6ebca45f
SL
5919static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5920{
b17ab470
RH
5921 gen_helper_gvec_mem_scatter *fn;
5922 bool be = s->be_data == MO_BE;
5923 bool mte = s->mte_active[0];
5924
5925 if (a->esz < a->msz) {
5926 return false;
5927 }
6ebca45f
SL
5928 if (!dc_isar_feature(aa64_sve2, s)) {
5929 return false;
5930 }
765ff97d 5931 s->is_nonstreaming = true;
b17ab470
RH
5932 if (!sve_access_check(s)) {
5933 return true;
5934 }
5935
5936 switch (a->esz) {
5937 case MO_32:
5938 fn = scatter_store_fn32[mte][be][0][a->msz];
5939 break;
5940 case MO_64:
5941 fn = scatter_store_fn64[mte][be][2][a->msz];
5942 break;
5943 default:
5944 g_assert_not_reached();
5945 }
5946
5947 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5948 cpu_reg(s, a->rm), a->msz, true, fn);
5949 return true;
6ebca45f
SL
5950}
5951
dec6cf6b
RH
5952/*
5953 * Prefetches
5954 */
5955
3a7be554 5956static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b 5957{
1402a6b8
RH
5958 if (!dc_isar_feature(aa64_sve, s)) {
5959 return false;
5960 }
dec6cf6b 5961 /* Prefetch is a nop within QEMU. */
2f95a3b0 5962 (void)sve_access_check(s);
dec6cf6b
RH
5963 return true;
5964}
5965
3a7be554 5966static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b 5967{
1402a6b8 5968 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
dec6cf6b
RH
5969 return false;
5970 }
5971 /* Prefetch is a nop within QEMU. */
2f95a3b0 5972 (void)sve_access_check(s);
dec6cf6b
RH
5973 return true;
5974}
a2103582 5975
e1d1a643
RH
5976static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
5977{
5978 if (!dc_isar_feature(aa64_sve, s)) {
5979 return false;
5980 }
5981 /* Prefetch is a nop within QEMU. */
5982 s->is_nonstreaming = true;
5983 (void)sve_access_check(s);
5984 return true;
5985}
5986
a2103582
RH
5987/*
5988 * Move Prefix
5989 *
5990 * TODO: The implementation so far could handle predicated merging movprfx.
5991 * The helper functions as written take an extra source register to
5992 * use in the operation, but the result is only written when predication
5993 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5994 * to allow the final write back to the destination to be unconditional.
5995 * For predicated zeroing movprfx, we need to rearrange the helpers to
5996 * allow the final write back to zero inactives.
5997 *
5998 * In the meantime, just emit the moves.
5999 */
6000
4b0b37e9
RH
6001TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
6002TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
6003TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5dad1ba5
RH
6004
6005/*
6006 * SVE2 Integer Multiply - Unpredicated
6007 */
6008
b262215b 6009TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 6010
bd394cf5
RH
6011static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6012 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6013 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6014};
6015TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6016 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6017
bd394cf5
RH
6018static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6019 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6020 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6021};
6022TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6023 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6024
bd394cf5
RH
6025TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6026 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6027
bd394cf5
RH
6028static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6029 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6030 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6031};
6032TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6033 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6034
bd394cf5
RH
6035static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6036 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6037 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6038};
6039TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6040 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6041
d4b1e59d
RH
6042/*
6043 * SVE2 Integer - Predicated
6044 */
6045
5880bdc0
RH
6046static gen_helper_gvec_4 * const sadlp_fns[4] = {
6047 NULL, gen_helper_sve2_sadalp_zpzz_h,
6048 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6049};
6050TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6051 sadlp_fns[a->esz], a, 0)
d4b1e59d 6052
5880bdc0
RH
6053static gen_helper_gvec_4 * const uadlp_fns[4] = {
6054 NULL, gen_helper_sve2_uadalp_zpzz_h,
6055 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6056};
6057TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6058 uadlp_fns[a->esz], a, 0)
db366da8
RH
6059
6060/*
6061 * SVE2 integer unary operations (predicated)
6062 */
6063
b2c00961
RH
6064TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6065 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6066
b2c00961
RH
6067TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6068 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6069
b2c00961
RH
6070static gen_helper_gvec_3 * const sqabs_fns[4] = {
6071 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6072 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6073};
6074TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6075
b2c00961
RH
6076static gen_helper_gvec_3 * const sqneg_fns[4] = {
6077 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6078 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6079};
6080TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 6081
5880bdc0
RH
6082DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6083DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6084DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 6085
5880bdc0
RH
6086DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6087DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6088DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 6089
5880bdc0
RH
6090DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6091DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6092DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 6093
5880bdc0
RH
6094DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6095DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6096DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 6097
5880bdc0
RH
6098DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6099DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6100DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6101DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6102DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6103
5880bdc0
RH
6104DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6105DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6106DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6107DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6108DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6109DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6110
6111/*
6112 * SVE2 Widening Integer Arithmetic
6113 */
6114
615f19fe
RH
6115static gen_helper_gvec_3 * const saddl_fns[4] = {
6116 NULL, gen_helper_sve2_saddl_h,
6117 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6118};
6119TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6120 saddl_fns[a->esz], a, 0)
6121TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6122 saddl_fns[a->esz], a, 3)
6123TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6124 saddl_fns[a->esz], a, 2)
6125
6126static gen_helper_gvec_3 * const ssubl_fns[4] = {
6127 NULL, gen_helper_sve2_ssubl_h,
6128 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6129};
6130TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6131 ssubl_fns[a->esz], a, 0)
6132TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6133 ssubl_fns[a->esz], a, 3)
6134TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6135 ssubl_fns[a->esz], a, 2)
6136TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6137 ssubl_fns[a->esz], a, 1)
6138
6139static gen_helper_gvec_3 * const sabdl_fns[4] = {
6140 NULL, gen_helper_sve2_sabdl_h,
6141 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6142};
6143TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6144 sabdl_fns[a->esz], a, 0)
6145TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6146 sabdl_fns[a->esz], a, 3)
6147
6148static gen_helper_gvec_3 * const uaddl_fns[4] = {
6149 NULL, gen_helper_sve2_uaddl_h,
6150 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6151};
6152TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6153 uaddl_fns[a->esz], a, 0)
6154TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6155 uaddl_fns[a->esz], a, 3)
6156
6157static gen_helper_gvec_3 * const usubl_fns[4] = {
6158 NULL, gen_helper_sve2_usubl_h,
6159 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6160};
6161TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6162 usubl_fns[a->esz], a, 0)
6163TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6164 usubl_fns[a->esz], a, 3)
6165
6166static gen_helper_gvec_3 * const uabdl_fns[4] = {
6167 NULL, gen_helper_sve2_uabdl_h,
6168 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6169};
6170TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6171 uabdl_fns[a->esz], a, 0)
6172TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6173 uabdl_fns[a->esz], a, 3)
6174
6175static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6176 NULL, gen_helper_sve2_sqdmull_zzz_h,
6177 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6178};
6179TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6180 sqdmull_fns[a->esz], a, 0)
6181TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6182 sqdmull_fns[a->esz], a, 3)
6183
6184static gen_helper_gvec_3 * const smull_fns[4] = {
6185 NULL, gen_helper_sve2_smull_zzz_h,
6186 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6187};
6188TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6189 smull_fns[a->esz], a, 0)
6190TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6191 smull_fns[a->esz], a, 3)
6192
6193static gen_helper_gvec_3 * const umull_fns[4] = {
6194 NULL, gen_helper_sve2_umull_zzz_h,
6195 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6196};
6197TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6198 umull_fns[a->esz], a, 0)
6199TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6200 umull_fns[a->esz], a, 3)
6201
6202static gen_helper_gvec_3 * const eoril_fns[4] = {
6203 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6204 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6205};
6206TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6207TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6208
e3a56131
RH
6209static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6210{
6211 static gen_helper_gvec_3 * const fns[4] = {
6212 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6213 NULL, gen_helper_sve2_pmull_d,
6214 };
4464ee36
RH
6215
6216 if (a->esz == 0) {
6217 if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
6218 return false;
6219 }
6220 s->is_nonstreaming = true;
6221 } else if (!dc_isar_feature(aa64_sve, s)) {
e3a56131
RH
6222 return false;
6223 }
615f19fe 6224 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6225}
6226
615f19fe
RH
6227TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6228TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6229
615f19fe
RH
6230static gen_helper_gvec_3 * const saddw_fns[4] = {
6231 NULL, gen_helper_sve2_saddw_h,
6232 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6233};
6234TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6235TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6236
615f19fe
RH
6237static gen_helper_gvec_3 * const ssubw_fns[4] = {
6238 NULL, gen_helper_sve2_ssubw_h,
6239 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6240};
6241TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6242TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6243
615f19fe
RH
6244static gen_helper_gvec_3 * const uaddw_fns[4] = {
6245 NULL, gen_helper_sve2_uaddw_h,
6246 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6247};
6248TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6249TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6250
615f19fe
RH
6251static gen_helper_gvec_3 * const usubw_fns[4] = {
6252 NULL, gen_helper_sve2_usubw_h,
6253 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6254};
6255TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6256TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6257
6258static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6259{
6260 int top = imm & 1;
6261 int shl = imm >> 1;
6262 int halfbits = 4 << vece;
6263
6264 if (top) {
6265 if (shl == halfbits) {
6266 TCGv_vec t = tcg_temp_new_vec_matching(d);
6267 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6268 tcg_gen_and_vec(vece, d, n, t);
6269 tcg_temp_free_vec(t);
6270 } else {
6271 tcg_gen_sari_vec(vece, d, n, halfbits);
6272 tcg_gen_shli_vec(vece, d, d, shl);
6273 }
6274 } else {
6275 tcg_gen_shli_vec(vece, d, n, halfbits);
6276 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6277 }
6278}
6279
6280static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6281{
6282 int halfbits = 4 << vece;
6283 int top = imm & 1;
6284 int shl = (imm >> 1);
6285 int shift;
6286 uint64_t mask;
6287
6288 mask = MAKE_64BIT_MASK(0, halfbits);
6289 mask <<= shl;
6290 mask = dup_const(vece, mask);
6291
6292 shift = shl - top * halfbits;
6293 if (shift < 0) {
6294 tcg_gen_shri_i64(d, n, -shift);
6295 } else {
6296 tcg_gen_shli_i64(d, n, shift);
6297 }
6298 tcg_gen_andi_i64(d, d, mask);
6299}
6300
6301static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6302{
6303 gen_ushll_i64(MO_16, d, n, imm);
6304}
6305
6306static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6307{
6308 gen_ushll_i64(MO_32, d, n, imm);
6309}
6310
6311static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6312{
6313 gen_ushll_i64(MO_64, d, n, imm);
6314}
6315
6316static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6317{
6318 int halfbits = 4 << vece;
6319 int top = imm & 1;
6320 int shl = imm >> 1;
6321
6322 if (top) {
6323 if (shl == halfbits) {
6324 TCGv_vec t = tcg_temp_new_vec_matching(d);
6325 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6326 tcg_gen_and_vec(vece, d, n, t);
6327 tcg_temp_free_vec(t);
6328 } else {
6329 tcg_gen_shri_vec(vece, d, n, halfbits);
6330 tcg_gen_shli_vec(vece, d, d, shl);
6331 }
6332 } else {
6333 if (shl == 0) {
6334 TCGv_vec t = tcg_temp_new_vec_matching(d);
6335 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6336 tcg_gen_and_vec(vece, d, n, t);
6337 tcg_temp_free_vec(t);
6338 } else {
6339 tcg_gen_shli_vec(vece, d, n, halfbits);
6340 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6341 }
6342 }
6343}
6344
5a528bb5
RH
6345static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6346 const GVecGen2i ops[3], bool sel)
4269fef1 6347{
4269fef1 6348
5a528bb5 6349 if (a->esz < 0 || a->esz > 2) {
4269fef1
RH
6350 return false;
6351 }
6352 if (sve_access_check(s)) {
6353 unsigned vsz = vec_full_reg_size(s);
6354 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6355 vec_full_reg_offset(s, a->rn),
6356 vsz, vsz, (a->imm << 1) | sel,
5a528bb5 6357 &ops[a->esz]);
4269fef1
RH
6358 }
6359 return true;
6360}
6361
5a528bb5
RH
6362static const TCGOpcode sshll_list[] = {
6363 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6364};
6365static const GVecGen2i sshll_ops[3] = {
6366 { .fniv = gen_sshll_vec,
6367 .opt_opc = sshll_list,
6368 .fno = gen_helper_sve2_sshll_h,
6369 .vece = MO_16 },
6370 { .fniv = gen_sshll_vec,
6371 .opt_opc = sshll_list,
6372 .fno = gen_helper_sve2_sshll_s,
6373 .vece = MO_32 },
6374 { .fniv = gen_sshll_vec,
6375 .opt_opc = sshll_list,
6376 .fno = gen_helper_sve2_sshll_d,
6377 .vece = MO_64 }
6378};
6379TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6380TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
4269fef1 6381
5a528bb5
RH
6382static const TCGOpcode ushll_list[] = {
6383 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6384};
6385static const GVecGen2i ushll_ops[3] = {
6386 { .fni8 = gen_ushll16_i64,
6387 .fniv = gen_ushll_vec,
6388 .opt_opc = ushll_list,
6389 .fno = gen_helper_sve2_ushll_h,
6390 .vece = MO_16 },
6391 { .fni8 = gen_ushll32_i64,
6392 .fniv = gen_ushll_vec,
6393 .opt_opc = ushll_list,
6394 .fno = gen_helper_sve2_ushll_s,
6395 .vece = MO_32 },
6396 { .fni8 = gen_ushll64_i64,
6397 .fniv = gen_ushll_vec,
6398 .opt_opc = ushll_list,
6399 .fno = gen_helper_sve2_ushll_d,
6400 .vece = MO_64 },
6401};
6402TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6403TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
cb9c33b8 6404
615f19fe
RH
6405static gen_helper_gvec_3 * const bext_fns[4] = {
6406 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6407 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6408};
ca363d23
RH
6409TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6410 bext_fns[a->esz], a, 0)
ed4a6387 6411
615f19fe
RH
6412static gen_helper_gvec_3 * const bdep_fns[4] = {
6413 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6414 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6415};
ca363d23
RH
6416TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6417 bdep_fns[a->esz], a, 0)
ed4a6387 6418
615f19fe
RH
6419static gen_helper_gvec_3 * const bgrp_fns[4] = {
6420 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6421 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6422};
ca363d23
RH
6423TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6424 bgrp_fns[a->esz], a, 0)
ed4a6387 6425
615f19fe
RH
6426static gen_helper_gvec_3 * const cadd_fns[4] = {
6427 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6428 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6429};
6430TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6431 cadd_fns[a->esz], a, 0)
6432TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6433 cadd_fns[a->esz], a, 1)
6434
6435static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6436 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6437 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6438};
6439TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6440 sqcadd_fns[a->esz], a, 0)
6441TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6442 sqcadd_fns[a->esz], a, 1)
38650638 6443
eeb4e84d
RH
6444static gen_helper_gvec_4 * const sabal_fns[4] = {
6445 NULL, gen_helper_sve2_sabal_h,
6446 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6447};
6448TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6449TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6450
eeb4e84d
RH
6451static gen_helper_gvec_4 * const uabal_fns[4] = {
6452 NULL, gen_helper_sve2_uabal_h,
6453 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6454};
6455TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6456TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6457
6458static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6459{
6460 static gen_helper_gvec_4 * const fns[2] = {
6461 gen_helper_sve2_adcl_s,
6462 gen_helper_sve2_adcl_d,
6463 };
6464 /*
6465 * Note that in this case the ESZ field encodes both size and sign.
6466 * Split out 'subtract' into bit 1 of the data field for the helper.
6467 */
eeb4e84d 6468 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6469}
6470
eeb4e84d
RH
6471TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6472TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6473
f2be26a5
RH
6474TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6475TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6476TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6477TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6478TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6479TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6480
79828dcb
RH
6481TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6482TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d 6483
6100d084
RH
6484static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6485 const GVecGen2 ops[3])
5ff2838d 6486{
6100d084 6487 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
5ff2838d
RH
6488 return false;
6489 }
6490 if (sve_access_check(s)) {
6491 unsigned vsz = vec_full_reg_size(s);
6492 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6493 vec_full_reg_offset(s, a->rn),
6494 vsz, vsz, &ops[a->esz]);
6495 }
6496 return true;
6497}
6498
6499static const TCGOpcode sqxtn_list[] = {
6500 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6501};
6502
6503static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6504{
6505 TCGv_vec t = tcg_temp_new_vec_matching(d);
6506 int halfbits = 4 << vece;
6507 int64_t mask = (1ull << halfbits) - 1;
6508 int64_t min = -1ull << (halfbits - 1);
6509 int64_t max = -min - 1;
6510
6511 tcg_gen_dupi_vec(vece, t, min);
6512 tcg_gen_smax_vec(vece, d, n, t);
6513 tcg_gen_dupi_vec(vece, t, max);
6514 tcg_gen_smin_vec(vece, d, d, t);
6515 tcg_gen_dupi_vec(vece, t, mask);
6516 tcg_gen_and_vec(vece, d, d, t);
6517 tcg_temp_free_vec(t);
6518}
6519
6100d084
RH
6520static const GVecGen2 sqxtnb_ops[3] = {
6521 { .fniv = gen_sqxtnb_vec,
6522 .opt_opc = sqxtn_list,
6523 .fno = gen_helper_sve2_sqxtnb_h,
6524 .vece = MO_16 },
6525 { .fniv = gen_sqxtnb_vec,
6526 .opt_opc = sqxtn_list,
6527 .fno = gen_helper_sve2_sqxtnb_s,
6528 .vece = MO_32 },
6529 { .fniv = gen_sqxtnb_vec,
6530 .opt_opc = sqxtn_list,
6531 .fno = gen_helper_sve2_sqxtnb_d,
6532 .vece = MO_64 },
6533};
6534TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
5ff2838d
RH
6535
6536static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6537{
6538 TCGv_vec t = tcg_temp_new_vec_matching(d);
6539 int halfbits = 4 << vece;
6540 int64_t mask = (1ull << halfbits) - 1;
6541 int64_t min = -1ull << (halfbits - 1);
6542 int64_t max = -min - 1;
6543
6544 tcg_gen_dupi_vec(vece, t, min);
6545 tcg_gen_smax_vec(vece, n, n, t);
6546 tcg_gen_dupi_vec(vece, t, max);
6547 tcg_gen_smin_vec(vece, n, n, t);
6548 tcg_gen_shli_vec(vece, n, n, halfbits);
6549 tcg_gen_dupi_vec(vece, t, mask);
6550 tcg_gen_bitsel_vec(vece, d, t, d, n);
6551 tcg_temp_free_vec(t);
6552}
6553
6100d084
RH
6554static const GVecGen2 sqxtnt_ops[3] = {
6555 { .fniv = gen_sqxtnt_vec,
6556 .opt_opc = sqxtn_list,
6557 .load_dest = true,
6558 .fno = gen_helper_sve2_sqxtnt_h,
6559 .vece = MO_16 },
6560 { .fniv = gen_sqxtnt_vec,
6561 .opt_opc = sqxtn_list,
6562 .load_dest = true,
6563 .fno = gen_helper_sve2_sqxtnt_s,
6564 .vece = MO_32 },
6565 { .fniv = gen_sqxtnt_vec,
6566 .opt_opc = sqxtn_list,
6567 .load_dest = true,
6568 .fno = gen_helper_sve2_sqxtnt_d,
6569 .vece = MO_64 },
6570};
6571TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
5ff2838d
RH
6572
6573static const TCGOpcode uqxtn_list[] = {
6574 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6575};
6576
6577static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6578{
6579 TCGv_vec t = tcg_temp_new_vec_matching(d);
6580 int halfbits = 4 << vece;
6581 int64_t max = (1ull << halfbits) - 1;
6582
6583 tcg_gen_dupi_vec(vece, t, max);
6584 tcg_gen_umin_vec(vece, d, n, t);
6585 tcg_temp_free_vec(t);
6586}
6587
6100d084
RH
6588static const GVecGen2 uqxtnb_ops[3] = {
6589 { .fniv = gen_uqxtnb_vec,
6590 .opt_opc = uqxtn_list,
6591 .fno = gen_helper_sve2_uqxtnb_h,
6592 .vece = MO_16 },
6593 { .fniv = gen_uqxtnb_vec,
6594 .opt_opc = uqxtn_list,
6595 .fno = gen_helper_sve2_uqxtnb_s,
6596 .vece = MO_32 },
6597 { .fniv = gen_uqxtnb_vec,
6598 .opt_opc = uqxtn_list,
6599 .fno = gen_helper_sve2_uqxtnb_d,
6600 .vece = MO_64 },
6601};
6602TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
5ff2838d
RH
6603
6604static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6605{
6606 TCGv_vec t = tcg_temp_new_vec_matching(d);
6607 int halfbits = 4 << vece;
6608 int64_t max = (1ull << halfbits) - 1;
6609
6610 tcg_gen_dupi_vec(vece, t, max);
6611 tcg_gen_umin_vec(vece, n, n, t);
6612 tcg_gen_shli_vec(vece, n, n, halfbits);
6613 tcg_gen_bitsel_vec(vece, d, t, d, n);
6614 tcg_temp_free_vec(t);
6615}
6616
6100d084
RH
6617static const GVecGen2 uqxtnt_ops[3] = {
6618 { .fniv = gen_uqxtnt_vec,
6619 .opt_opc = uqxtn_list,
6620 .load_dest = true,
6621 .fno = gen_helper_sve2_uqxtnt_h,
6622 .vece = MO_16 },
6623 { .fniv = gen_uqxtnt_vec,
6624 .opt_opc = uqxtn_list,
6625 .load_dest = true,
6626 .fno = gen_helper_sve2_uqxtnt_s,
6627 .vece = MO_32 },
6628 { .fniv = gen_uqxtnt_vec,
6629 .opt_opc = uqxtn_list,
6630 .load_dest = true,
6631 .fno = gen_helper_sve2_uqxtnt_d,
6632 .vece = MO_64 },
6633};
6634TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
5ff2838d
RH
6635
6636static const TCGOpcode sqxtun_list[] = {
6637 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6638};
6639
6640static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6641{
6642 TCGv_vec t = tcg_temp_new_vec_matching(d);
6643 int halfbits = 4 << vece;
6644 int64_t max = (1ull << halfbits) - 1;
6645
6646 tcg_gen_dupi_vec(vece, t, 0);
6647 tcg_gen_smax_vec(vece, d, n, t);
6648 tcg_gen_dupi_vec(vece, t, max);
6649 tcg_gen_umin_vec(vece, d, d, t);
6650 tcg_temp_free_vec(t);
6651}
6652
6100d084
RH
6653static const GVecGen2 sqxtunb_ops[3] = {
6654 { .fniv = gen_sqxtunb_vec,
6655 .opt_opc = sqxtun_list,
6656 .fno = gen_helper_sve2_sqxtunb_h,
6657 .vece = MO_16 },
6658 { .fniv = gen_sqxtunb_vec,
6659 .opt_opc = sqxtun_list,
6660 .fno = gen_helper_sve2_sqxtunb_s,
6661 .vece = MO_32 },
6662 { .fniv = gen_sqxtunb_vec,
6663 .opt_opc = sqxtun_list,
6664 .fno = gen_helper_sve2_sqxtunb_d,
6665 .vece = MO_64 },
6666};
6667TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
5ff2838d
RH
6668
6669static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6670{
6671 TCGv_vec t = tcg_temp_new_vec_matching(d);
6672 int halfbits = 4 << vece;
6673 int64_t max = (1ull << halfbits) - 1;
6674
6675 tcg_gen_dupi_vec(vece, t, 0);
6676 tcg_gen_smax_vec(vece, n, n, t);
6677 tcg_gen_dupi_vec(vece, t, max);
6678 tcg_gen_umin_vec(vece, n, n, t);
6679 tcg_gen_shli_vec(vece, n, n, halfbits);
6680 tcg_gen_bitsel_vec(vece, d, t, d, n);
6681 tcg_temp_free_vec(t);
6682}
6683
6100d084
RH
6684static const GVecGen2 sqxtunt_ops[3] = {
6685 { .fniv = gen_sqxtunt_vec,
6686 .opt_opc = sqxtun_list,
6687 .load_dest = true,
6688 .fno = gen_helper_sve2_sqxtunt_h,
6689 .vece = MO_16 },
6690 { .fniv = gen_sqxtunt_vec,
6691 .opt_opc = sqxtun_list,
6692 .load_dest = true,
6693 .fno = gen_helper_sve2_sqxtunt_s,
6694 .vece = MO_32 },
6695 { .fniv = gen_sqxtunt_vec,
6696 .opt_opc = sqxtun_list,
6697 .load_dest = true,
6698 .fno = gen_helper_sve2_sqxtunt_d,
6699 .vece = MO_64 },
6700};
6701TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
46d111b2 6702
f7f2f0fa
RH
6703static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6704 const GVecGen2i ops[3])
46d111b2 6705{
f7f2f0fa 6706 if (a->esz < 0 || a->esz > MO_32) {
46d111b2
RH
6707 return false;
6708 }
6709 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6710 if (sve_access_check(s)) {
6711 unsigned vsz = vec_full_reg_size(s);
6712 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6713 vec_full_reg_offset(s, a->rn),
6714 vsz, vsz, a->imm, &ops[a->esz]);
6715 }
6716 return true;
6717}
6718
6719static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6720{
6721 int halfbits = 4 << vece;
6722 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6723
6724 tcg_gen_shri_i64(d, n, shr);
6725 tcg_gen_andi_i64(d, d, mask);
6726}
6727
6728static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6729{
6730 gen_shrnb_i64(MO_16, d, n, shr);
6731}
6732
6733static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6734{
6735 gen_shrnb_i64(MO_32, d, n, shr);
6736}
6737
6738static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6739{
6740 gen_shrnb_i64(MO_64, d, n, shr);
6741}
6742
6743static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6744{
6745 TCGv_vec t = tcg_temp_new_vec_matching(d);
6746 int halfbits = 4 << vece;
6747 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6748
6749 tcg_gen_shri_vec(vece, n, n, shr);
6750 tcg_gen_dupi_vec(vece, t, mask);
6751 tcg_gen_and_vec(vece, d, n, t);
6752 tcg_temp_free_vec(t);
6753}
6754
f7f2f0fa
RH
6755static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6756static const GVecGen2i shrnb_ops[3] = {
6757 { .fni8 = gen_shrnb16_i64,
6758 .fniv = gen_shrnb_vec,
6759 .opt_opc = shrnb_vec_list,
6760 .fno = gen_helper_sve2_shrnb_h,
6761 .vece = MO_16 },
6762 { .fni8 = gen_shrnb32_i64,
6763 .fniv = gen_shrnb_vec,
6764 .opt_opc = shrnb_vec_list,
6765 .fno = gen_helper_sve2_shrnb_s,
6766 .vece = MO_32 },
6767 { .fni8 = gen_shrnb64_i64,
6768 .fniv = gen_shrnb_vec,
6769 .opt_opc = shrnb_vec_list,
6770 .fno = gen_helper_sve2_shrnb_d,
6771 .vece = MO_64 },
6772};
6773TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
46d111b2
RH
6774
6775static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6776{
6777 int halfbits = 4 << vece;
6778 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6779
6780 tcg_gen_shli_i64(n, n, halfbits - shr);
6781 tcg_gen_andi_i64(n, n, ~mask);
6782 tcg_gen_andi_i64(d, d, mask);
6783 tcg_gen_or_i64(d, d, n);
6784}
6785
6786static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6787{
6788 gen_shrnt_i64(MO_16, d, n, shr);
6789}
6790
6791static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6792{
6793 gen_shrnt_i64(MO_32, d, n, shr);
6794}
6795
6796static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6797{
6798 tcg_gen_shri_i64(n, n, shr);
6799 tcg_gen_deposit_i64(d, d, n, 32, 32);
6800}
6801
6802static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6803{
6804 TCGv_vec t = tcg_temp_new_vec_matching(d);
6805 int halfbits = 4 << vece;
6806 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6807
6808 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6809 tcg_gen_dupi_vec(vece, t, mask);
6810 tcg_gen_bitsel_vec(vece, d, t, d, n);
6811 tcg_temp_free_vec(t);
6812}
6813
f7f2f0fa
RH
6814static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6815static const GVecGen2i shrnt_ops[3] = {
6816 { .fni8 = gen_shrnt16_i64,
6817 .fniv = gen_shrnt_vec,
6818 .opt_opc = shrnt_vec_list,
6819 .load_dest = true,
6820 .fno = gen_helper_sve2_shrnt_h,
6821 .vece = MO_16 },
6822 { .fni8 = gen_shrnt32_i64,
6823 .fniv = gen_shrnt_vec,
6824 .opt_opc = shrnt_vec_list,
6825 .load_dest = true,
6826 .fno = gen_helper_sve2_shrnt_s,
6827 .vece = MO_32 },
6828 { .fni8 = gen_shrnt64_i64,
6829 .fniv = gen_shrnt_vec,
6830 .opt_opc = shrnt_vec_list,
6831 .load_dest = true,
6832 .fno = gen_helper_sve2_shrnt_d,
6833 .vece = MO_64 },
6834};
6835TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
46d111b2 6836
f7f2f0fa
RH
6837static const GVecGen2i rshrnb_ops[3] = {
6838 { .fno = gen_helper_sve2_rshrnb_h },
6839 { .fno = gen_helper_sve2_rshrnb_s },
6840 { .fno = gen_helper_sve2_rshrnb_d },
6841};
6842TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
46d111b2 6843
f7f2f0fa
RH
6844static const GVecGen2i rshrnt_ops[3] = {
6845 { .fno = gen_helper_sve2_rshrnt_h },
6846 { .fno = gen_helper_sve2_rshrnt_s },
6847 { .fno = gen_helper_sve2_rshrnt_d },
6848};
6849TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
81fd3e6e
RH
6850
6851static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6852 TCGv_vec n, int64_t shr)
6853{
6854 TCGv_vec t = tcg_temp_new_vec_matching(d);
6855 int halfbits = 4 << vece;
6856
6857 tcg_gen_sari_vec(vece, n, n, shr);
6858 tcg_gen_dupi_vec(vece, t, 0);
6859 tcg_gen_smax_vec(vece, n, n, t);
6860 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6861 tcg_gen_umin_vec(vece, d, n, t);
6862 tcg_temp_free_vec(t);
6863}
6864
f7f2f0fa
RH
6865static const TCGOpcode sqshrunb_vec_list[] = {
6866 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6867};
6868static const GVecGen2i sqshrunb_ops[3] = {
6869 { .fniv = gen_sqshrunb_vec,
6870 .opt_opc = sqshrunb_vec_list,
6871 .fno = gen_helper_sve2_sqshrunb_h,
6872 .vece = MO_16 },
6873 { .fniv = gen_sqshrunb_vec,
6874 .opt_opc = sqshrunb_vec_list,
6875 .fno = gen_helper_sve2_sqshrunb_s,
6876 .vece = MO_32 },
6877 { .fniv = gen_sqshrunb_vec,
6878 .opt_opc = sqshrunb_vec_list,
6879 .fno = gen_helper_sve2_sqshrunb_d,
6880 .vece = MO_64 },
6881};
6882TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
81fd3e6e
RH
6883
6884static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6885 TCGv_vec n, int64_t shr)
6886{
6887 TCGv_vec t = tcg_temp_new_vec_matching(d);
6888 int halfbits = 4 << vece;
6889
6890 tcg_gen_sari_vec(vece, n, n, shr);
6891 tcg_gen_dupi_vec(vece, t, 0);
6892 tcg_gen_smax_vec(vece, n, n, t);
6893 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6894 tcg_gen_umin_vec(vece, n, n, t);
6895 tcg_gen_shli_vec(vece, n, n, halfbits);
6896 tcg_gen_bitsel_vec(vece, d, t, d, n);
6897 tcg_temp_free_vec(t);
6898}
6899
f7f2f0fa
RH
6900static const TCGOpcode sqshrunt_vec_list[] = {
6901 INDEX_op_shli_vec, INDEX_op_sari_vec,
6902 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6903};
6904static const GVecGen2i sqshrunt_ops[3] = {
6905 { .fniv = gen_sqshrunt_vec,
6906 .opt_opc = sqshrunt_vec_list,
6907 .load_dest = true,
6908 .fno = gen_helper_sve2_sqshrunt_h,
6909 .vece = MO_16 },
6910 { .fniv = gen_sqshrunt_vec,
6911 .opt_opc = sqshrunt_vec_list,
6912 .load_dest = true,
6913 .fno = gen_helper_sve2_sqshrunt_s,
6914 .vece = MO_32 },
6915 { .fniv = gen_sqshrunt_vec,
6916 .opt_opc = sqshrunt_vec_list,
6917 .load_dest = true,
6918 .fno = gen_helper_sve2_sqshrunt_d,
6919 .vece = MO_64 },
6920};
6921TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
81fd3e6e 6922
f7f2f0fa
RH
6923static const GVecGen2i sqrshrunb_ops[3] = {
6924 { .fno = gen_helper_sve2_sqrshrunb_h },
6925 { .fno = gen_helper_sve2_sqrshrunb_s },
6926 { .fno = gen_helper_sve2_sqrshrunb_d },
6927};
6928TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
81fd3e6e 6929
f7f2f0fa
RH
6930static const GVecGen2i sqrshrunt_ops[3] = {
6931 { .fno = gen_helper_sve2_sqrshrunt_h },
6932 { .fno = gen_helper_sve2_sqrshrunt_s },
6933 { .fno = gen_helper_sve2_sqrshrunt_d },
6934};
6935TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
c13418da 6936
743bb147
RH
6937static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6938 TCGv_vec n, int64_t shr)
6939{
6940 TCGv_vec t = tcg_temp_new_vec_matching(d);
6941 int halfbits = 4 << vece;
6942 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6943 int64_t min = -max - 1;
6944
6945 tcg_gen_sari_vec(vece, n, n, shr);
6946 tcg_gen_dupi_vec(vece, t, min);
6947 tcg_gen_smax_vec(vece, n, n, t);
6948 tcg_gen_dupi_vec(vece, t, max);
6949 tcg_gen_smin_vec(vece, n, n, t);
6950 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6951 tcg_gen_and_vec(vece, d, n, t);
6952 tcg_temp_free_vec(t);
6953}
6954
f7f2f0fa
RH
6955static const TCGOpcode sqshrnb_vec_list[] = {
6956 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6957};
6958static const GVecGen2i sqshrnb_ops[3] = {
6959 { .fniv = gen_sqshrnb_vec,
6960 .opt_opc = sqshrnb_vec_list,
6961 .fno = gen_helper_sve2_sqshrnb_h,
6962 .vece = MO_16 },
6963 { .fniv = gen_sqshrnb_vec,
6964 .opt_opc = sqshrnb_vec_list,
6965 .fno = gen_helper_sve2_sqshrnb_s,
6966 .vece = MO_32 },
6967 { .fniv = gen_sqshrnb_vec,
6968 .opt_opc = sqshrnb_vec_list,
6969 .fno = gen_helper_sve2_sqshrnb_d,
6970 .vece = MO_64 },
6971};
6972TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
743bb147
RH
6973
6974static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6975 TCGv_vec n, int64_t shr)
6976{
6977 TCGv_vec t = tcg_temp_new_vec_matching(d);
6978 int halfbits = 4 << vece;
6979 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6980 int64_t min = -max - 1;
6981
6982 tcg_gen_sari_vec(vece, n, n, shr);
6983 tcg_gen_dupi_vec(vece, t, min);
6984 tcg_gen_smax_vec(vece, n, n, t);
6985 tcg_gen_dupi_vec(vece, t, max);
6986 tcg_gen_smin_vec(vece, n, n, t);
6987 tcg_gen_shli_vec(vece, n, n, halfbits);
6988 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6989 tcg_gen_bitsel_vec(vece, d, t, d, n);
6990 tcg_temp_free_vec(t);
6991}
6992
f7f2f0fa
RH
6993static const TCGOpcode sqshrnt_vec_list[] = {
6994 INDEX_op_shli_vec, INDEX_op_sari_vec,
6995 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6996};
6997static const GVecGen2i sqshrnt_ops[3] = {
6998 { .fniv = gen_sqshrnt_vec,
6999 .opt_opc = sqshrnt_vec_list,
7000 .load_dest = true,
7001 .fno = gen_helper_sve2_sqshrnt_h,
7002 .vece = MO_16 },
7003 { .fniv = gen_sqshrnt_vec,
7004 .opt_opc = sqshrnt_vec_list,
7005 .load_dest = true,
7006 .fno = gen_helper_sve2_sqshrnt_s,
7007 .vece = MO_32 },
7008 { .fniv = gen_sqshrnt_vec,
7009 .opt_opc = sqshrnt_vec_list,
7010 .load_dest = true,
7011 .fno = gen_helper_sve2_sqshrnt_d,
7012 .vece = MO_64 },
7013};
7014TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
743bb147 7015
f7f2f0fa
RH
7016static const GVecGen2i sqrshrnb_ops[3] = {
7017 { .fno = gen_helper_sve2_sqrshrnb_h },
7018 { .fno = gen_helper_sve2_sqrshrnb_s },
7019 { .fno = gen_helper_sve2_sqrshrnb_d },
7020};
7021TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
743bb147 7022
f7f2f0fa
RH
7023static const GVecGen2i sqrshrnt_ops[3] = {
7024 { .fno = gen_helper_sve2_sqrshrnt_h },
7025 { .fno = gen_helper_sve2_sqrshrnt_s },
7026 { .fno = gen_helper_sve2_sqrshrnt_d },
7027};
7028TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
743bb147 7029
c13418da
RH
7030static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7031 TCGv_vec n, int64_t shr)
7032{
7033 TCGv_vec t = tcg_temp_new_vec_matching(d);
7034 int halfbits = 4 << vece;
7035
7036 tcg_gen_shri_vec(vece, n, n, shr);
7037 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7038 tcg_gen_umin_vec(vece, d, n, t);
7039 tcg_temp_free_vec(t);
7040}
7041
f7f2f0fa
RH
7042static const TCGOpcode uqshrnb_vec_list[] = {
7043 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7044};
7045static const GVecGen2i uqshrnb_ops[3] = {
7046 { .fniv = gen_uqshrnb_vec,
7047 .opt_opc = uqshrnb_vec_list,
7048 .fno = gen_helper_sve2_uqshrnb_h,
7049 .vece = MO_16 },
7050 { .fniv = gen_uqshrnb_vec,
7051 .opt_opc = uqshrnb_vec_list,
7052 .fno = gen_helper_sve2_uqshrnb_s,
7053 .vece = MO_32 },
7054 { .fniv = gen_uqshrnb_vec,
7055 .opt_opc = uqshrnb_vec_list,
7056 .fno = gen_helper_sve2_uqshrnb_d,
7057 .vece = MO_64 },
7058};
7059TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
c13418da
RH
7060
7061static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7062 TCGv_vec n, int64_t shr)
7063{
7064 TCGv_vec t = tcg_temp_new_vec_matching(d);
7065 int halfbits = 4 << vece;
7066
7067 tcg_gen_shri_vec(vece, n, n, shr);
7068 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7069 tcg_gen_umin_vec(vece, n, n, t);
7070 tcg_gen_shli_vec(vece, n, n, halfbits);
7071 tcg_gen_bitsel_vec(vece, d, t, d, n);
7072 tcg_temp_free_vec(t);
7073}
7074
f7f2f0fa
RH
7075static const TCGOpcode uqshrnt_vec_list[] = {
7076 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7077};
7078static const GVecGen2i uqshrnt_ops[3] = {
7079 { .fniv = gen_uqshrnt_vec,
7080 .opt_opc = uqshrnt_vec_list,
7081 .load_dest = true,
7082 .fno = gen_helper_sve2_uqshrnt_h,
7083 .vece = MO_16 },
7084 { .fniv = gen_uqshrnt_vec,
7085 .opt_opc = uqshrnt_vec_list,
7086 .load_dest = true,
7087 .fno = gen_helper_sve2_uqshrnt_s,
7088 .vece = MO_32 },
7089 { .fniv = gen_uqshrnt_vec,
7090 .opt_opc = uqshrnt_vec_list,
7091 .load_dest = true,
7092 .fno = gen_helper_sve2_uqshrnt_d,
7093 .vece = MO_64 },
7094};
7095TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
c13418da 7096
f7f2f0fa
RH
7097static const GVecGen2i uqrshrnb_ops[3] = {
7098 { .fno = gen_helper_sve2_uqrshrnb_h },
7099 { .fno = gen_helper_sve2_uqrshrnb_s },
7100 { .fno = gen_helper_sve2_uqrshrnb_d },
7101};
7102TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
c13418da 7103
f7f2f0fa
RH
7104static const GVecGen2i uqrshrnt_ops[3] = {
7105 { .fno = gen_helper_sve2_uqrshrnt_h },
7106 { .fno = gen_helper_sve2_uqrshrnt_s },
7107 { .fno = gen_helper_sve2_uqrshrnt_d },
7108};
7109TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
b87dbeeb 7110
40d5ea50 7111#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7112 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7113 NULL, gen_helper_sve2_##name##_h, \
7114 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7115 }; \
bd394cf5
RH
7116 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7117 name##_fns[a->esz], a, 0)
40d5ea50
SL
7118
7119DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7120DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7121DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7122DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7123
c3cd6766
SL
7124DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7125DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7126DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7127DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7128
ef75309b
RH
7129static gen_helper_gvec_flags_4 * const match_fns[4] = {
7130 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7131};
46feb361 7132TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 7133
ef75309b
RH
7134static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7135 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7136};
46feb361 7137TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 7138
5880bdc0
RH
7139static gen_helper_gvec_4 * const histcnt_fns[4] = {
7140 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7141};
46feb361
RH
7142TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7143 histcnt_fns[a->esz], a, 0)
7d47ac94 7144
46feb361
RH
7145TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7146 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7147
7de2617b
RH
7148DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7149DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7150DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7151DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7152DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
bfc9307e
RH
7153
7154/*
7155 * SVE Integer Multiply-Add (unpredicated)
7156 */
7157
4464ee36
RH
7158TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
7159 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
7160 0, FPST_FPCR)
7161TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
7162 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
7163 0, FPST_FPCR)
4f26756b 7164
eeb4e84d
RH
7165static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7166 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7167 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7168};
7169TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7170 sqdmlal_zzzw_fns[a->esz], a, 0)
7171TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7172 sqdmlal_zzzw_fns[a->esz], a, 3)
7173TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7174 sqdmlal_zzzw_fns[a->esz], a, 2)
7175
7176static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7177 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7178 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7179};
7180TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7181 sqdmlsl_zzzw_fns[a->esz], a, 0)
7182TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7183 sqdmlsl_zzzw_fns[a->esz], a, 3)
7184TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7185 sqdmlsl_zzzw_fns[a->esz], a, 2)
7186
7187static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7188 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7189 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7190};
7191TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7192 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7193
eeb4e84d
RH
7194static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7195 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7196 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7197};
7198TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7199 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7200
eeb4e84d
RH
7201static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7202 NULL, gen_helper_sve2_smlal_zzzw_h,
7203 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7204};
7205TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7206 smlal_zzzw_fns[a->esz], a, 0)
7207TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7208 smlal_zzzw_fns[a->esz], a, 1)
7209
7210static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7211 NULL, gen_helper_sve2_umlal_zzzw_h,
7212 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7213};
7214TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7215 umlal_zzzw_fns[a->esz], a, 0)
7216TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7217 umlal_zzzw_fns[a->esz], a, 1)
7218
7219static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7220 NULL, gen_helper_sve2_smlsl_zzzw_h,
7221 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7222};
7223TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7224 smlsl_zzzw_fns[a->esz], a, 0)
7225TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7226 smlsl_zzzw_fns[a->esz], a, 1)
7227
7228static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7229 NULL, gen_helper_sve2_umlsl_zzzw_h,
7230 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7231};
7232TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7233 umlsl_zzzw_fns[a->esz], a, 0)
7234TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7235 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7236
5f425b92
RH
7237static gen_helper_gvec_4 * const cmla_fns[] = {
7238 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7239 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7240};
7241TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7242 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7243
5f425b92
RH
7244static gen_helper_gvec_4 * const cdot_fns[] = {
7245 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7246};
7247TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7248 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7249
5f425b92
RH
7250static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7251 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7252 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7253};
7254TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7255 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7256
8740d694
RH
7257TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7258 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7259
46feb361
RH
7260TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7261 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7262
46feb361
RH
7263TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7264 gen_helper_crypto_aese, a, false)
7265TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7266 gen_helper_crypto_aese, a, true)
3cc7a88e 7267
46feb361
RH
7268TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7269 gen_helper_crypto_sm4e, a, 0)
7270TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7271 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7272
46feb361
RH
7273TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
7274 gen_gvec_rax1, a)
5c1b7226 7275
0360730c
RH
7276TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7277 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7278TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7279 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
83c2523f 7280
0360730c
RH
7281TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7282 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
83c2523f 7283
0360730c
RH
7284TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7285 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7286TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7287 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
95365277 7288
27645836
RH
7289TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7290 float_round_to_odd, gen_helper_sve_fcvt_ds)
7291TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7292 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
631be02e 7293
7b9dfcfe
RH
7294static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7295 NULL, gen_helper_flogb_h,
7296 gen_helper_flogb_s, gen_helper_flogb_d
7297};
7298TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7299 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
50d102bd
SL
7300
7301static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7302{
41bf9b67
RH
7303 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7304 a->rd, a->rn, a->rm, a->ra,
7305 (sel << 1) | sub, cpu_env);
50d102bd
SL
7306}
7307
72c7f906
RH
7308TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7309TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7310TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7311TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
50d102bd
SL
7312
7313static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7314{
41bf9b67
RH
7315 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7316 a->rd, a->rn, a->rm, a->ra,
7317 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7318}
7319
fc7c8829
RH
7320TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7321TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7322TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7323TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
2323c5ff 7324
d79f3d5f
RH
7325TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7326 gen_helper_gvec_smmla_b, a, 0)
7327TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7328 gen_helper_gvec_usmmla_b, a, 0)
7329TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7330 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7331
eec05e4e
RH
7332TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7333 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7334TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7335 gen_helper_gvec_bfdot_idx, a)
81266a1f 7336
4464ee36
RH
7337TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7338 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7339
7340static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7341{
41bf9b67
RH
7342 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7343 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7344}
7345
698ddb9d
RH
7346TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7347TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
458d0ab6
RH
7348
7349static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7350{
41bf9b67
RH
7351 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7352 a->rd, a->rn, a->rm, a->ra,
7353 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7354}
7355
698ddb9d
RH
7356TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7357TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)