]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement PSEL
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
40e32e5a 103/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 104static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
105 int rd, int rn, int data)
106{
c5edf07d
RH
107 if (fn == NULL) {
108 return false;
109 }
110 if (sve_access_check(s)) {
111 unsigned vsz = vec_full_reg_size(s);
112 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
113 vec_full_reg_offset(s, rn),
114 vsz, vsz, data, fn);
115 }
116 return true;
40e32e5a
RH
117}
118
de58c6b0
RH
119static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
120 int rd, int rn, int data,
121 ARMFPStatusFlavour flavour)
122{
123 if (fn == NULL) {
124 return false;
125 }
126 if (sve_access_check(s)) {
127 unsigned vsz = vec_full_reg_size(s);
128 TCGv_ptr status = fpstatus_ptr(flavour);
129
130 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
131 vec_full_reg_offset(s, rn),
132 status, vsz, vsz, data, fn);
133 tcg_temp_free_ptr(status);
134 }
135 return true;
136}
137
138static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
139 arg_rr_esz *a, int data)
140{
141 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
142 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
143}
144
e645d1a1 145/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 146static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
147 int rd, int rn, int rm, int data)
148{
913a8a00
RH
149 if (fn == NULL) {
150 return false;
151 }
152 if (sve_access_check(s)) {
153 unsigned vsz = vec_full_reg_size(s);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
155 vec_full_reg_offset(s, rn),
156 vec_full_reg_offset(s, rm),
157 vsz, vsz, data, fn);
158 }
159 return true;
e645d1a1
RH
160}
161
84a272f5
RH
162static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
163 arg_rrr_esz *a, int data)
164{
165 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
166}
167
532724e4
RH
168/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
169static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
170 int rd, int rn, int rm,
171 int data, ARMFPStatusFlavour flavour)
172{
173 if (fn == NULL) {
174 return false;
175 }
176 if (sve_access_check(s)) {
177 unsigned vsz = vec_full_reg_size(s);
178 TCGv_ptr status = fpstatus_ptr(flavour);
179
180 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
181 vec_full_reg_offset(s, rn),
182 vec_full_reg_offset(s, rm),
183 status, vsz, vsz, data, fn);
184
185 tcg_temp_free_ptr(status);
186 }
187 return true;
188}
189
190static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
191 arg_rrr_esz *a, int data)
192{
193 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
194 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
195}
196
38650638 197/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 198static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
199 int rd, int rn, int rm, int ra, int data)
200{
7ad416b1
RH
201 if (fn == NULL) {
202 return false;
203 }
204 if (sve_access_check(s)) {
205 unsigned vsz = vec_full_reg_size(s);
206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
207 vec_full_reg_offset(s, rn),
208 vec_full_reg_offset(s, rm),
209 vec_full_reg_offset(s, ra),
210 vsz, vsz, data, fn);
211 }
212 return true;
38650638
RH
213}
214
cab79ac9
RH
215static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
216 arg_rrrr_esz *a, int data)
217{
218 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
219}
220
e82d3536
RH
221static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
222 arg_rrxr_esz *a)
223{
224 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
225}
226
41bf9b67
RH
227/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
228static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
229 int rd, int rn, int rm, int ra,
230 int data, TCGv_ptr ptr)
231{
232 if (fn == NULL) {
233 return false;
234 }
235 if (sve_access_check(s)) {
236 unsigned vsz = vec_full_reg_size(s);
237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
238 vec_full_reg_offset(s, rn),
239 vec_full_reg_offset(s, rm),
240 vec_full_reg_offset(s, ra),
241 ptr, vsz, vsz, data, fn);
242 }
243 return true;
244}
245
246static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
247 int rd, int rn, int rm, int ra,
248 int data, ARMFPStatusFlavour flavour)
249{
250 TCGv_ptr status = fpstatus_ptr(flavour);
251 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
252 tcg_temp_free_ptr(status);
253 return ret;
254}
255
e14da110
RH
256/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
257static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
258 int rd, int rn, int rm, int ra, int pg,
259 int data, ARMFPStatusFlavour flavour)
260{
261 if (fn == NULL) {
262 return false;
263 }
264 if (sve_access_check(s)) {
265 unsigned vsz = vec_full_reg_size(s);
266 TCGv_ptr status = fpstatus_ptr(flavour);
267
268 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
269 vec_full_reg_offset(s, rn),
270 vec_full_reg_offset(s, rm),
271 vec_full_reg_offset(s, ra),
272 pred_full_reg_offset(s, pg),
273 status, vsz, vsz, data, fn);
274
275 tcg_temp_free_ptr(status);
276 }
277 return true;
278}
279
96a461f7 280/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 281static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
282 int rd, int rn, int pg, int data)
283{
8fb27a21
RH
284 if (fn == NULL) {
285 return false;
286 }
287 if (sve_access_check(s)) {
288 unsigned vsz = vec_full_reg_size(s);
289 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
290 vec_full_reg_offset(s, rn),
291 pred_full_reg_offset(s, pg),
292 vsz, vsz, data, fn);
293 }
294 return true;
96a461f7
RH
295}
296
b051809a
RH
297static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
298 arg_rpr_esz *a, int data)
299{
300 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
301}
302
afa2529c
RH
303static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
304 arg_rpri_esz *a)
305{
306 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
307}
b051809a 308
0360730c
RH
309static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
310 int rd, int rn, int pg, int data,
311 ARMFPStatusFlavour flavour)
312{
313 if (fn == NULL) {
314 return false;
315 }
316 if (sve_access_check(s)) {
317 unsigned vsz = vec_full_reg_size(s);
318 TCGv_ptr status = fpstatus_ptr(flavour);
319
320 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
321 vec_full_reg_offset(s, rn),
322 pred_full_reg_offset(s, pg),
323 status, vsz, vsz, data, fn);
324 tcg_temp_free_ptr(status);
325 }
326 return true;
327}
328
329static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
330 arg_rpr_esz *a, int data,
331 ARMFPStatusFlavour flavour)
332{
333 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
334}
335
36cbb7a8 336/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 337static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
338 int rd, int rn, int rm, int pg, int data)
339{
2a753d1e
RH
340 if (fn == NULL) {
341 return false;
342 }
343 if (sve_access_check(s)) {
344 unsigned vsz = vec_full_reg_size(s);
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
346 vec_full_reg_offset(s, rn),
347 vec_full_reg_offset(s, rm),
348 pred_full_reg_offset(s, pg),
349 vsz, vsz, data, fn);
350 }
351 return true;
36cbb7a8 352}
f7d79c41 353
312016c9
RH
354static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
355 arg_rprr_esz *a, int data)
356{
357 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
358}
359
7e2d07ff
RH
360/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
361static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
362 int rd, int rn, int rm, int pg, int data,
363 ARMFPStatusFlavour flavour)
364{
365 if (fn == NULL) {
366 return false;
367 }
368 if (sve_access_check(s)) {
369 unsigned vsz = vec_full_reg_size(s);
370 TCGv_ptr status = fpstatus_ptr(flavour);
371
372 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
373 vec_full_reg_offset(s, rn),
374 vec_full_reg_offset(s, rm),
375 pred_full_reg_offset(s, pg),
376 status, vsz, vsz, data, fn);
377 tcg_temp_free_ptr(status);
378 }
379 return true;
380}
381
382static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
383 arg_rprr_esz *a)
384{
385 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
386 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
387}
388
faf915e2
RH
389/* Invoke a vector expander on two Zregs and an immediate. */
390static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
391 int esz, int rd, int rn, uint64_t imm)
392{
393 if (gvec_fn == NULL) {
394 return false;
395 }
396 if (sve_access_check(s)) {
397 unsigned vsz = vec_full_reg_size(s);
398 gvec_fn(esz, vec_full_reg_offset(s, rd),
399 vec_full_reg_offset(s, rn), imm, vsz, vsz);
400 }
401 return true;
402}
403
ada378f0
RH
404static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
405 arg_rri_esz *a)
406{
407 if (a->esz < 0) {
408 /* Invalid tsz encoding -- see tszimm_esz. */
409 return false;
410 }
411 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
412}
413
39eea561 414/* Invoke a vector expander on three Zregs. */
50f6db5f 415static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 416 int esz, int rd, int rn, int rm)
38388f7e 417{
50f6db5f
RH
418 if (gvec_fn == NULL) {
419 return false;
420 }
421 if (sve_access_check(s)) {
422 unsigned vsz = vec_full_reg_size(s);
423 gvec_fn(esz, vec_full_reg_offset(s, rd),
424 vec_full_reg_offset(s, rn),
425 vec_full_reg_offset(s, rm), vsz, vsz);
426 }
427 return true;
38388f7e
RH
428}
429
cd54bbe6
RH
430static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
431 arg_rrr_esz *a)
432{
433 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
434}
435
911cdc6d 436/* Invoke a vector expander on four Zregs. */
189876af
RH
437static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
438 arg_rrrr_esz *a)
911cdc6d 439{
189876af
RH
440 if (gvec_fn == NULL) {
441 return false;
442 }
443 if (sve_access_check(s)) {
444 unsigned vsz = vec_full_reg_size(s);
445 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
446 vec_full_reg_offset(s, a->rn),
447 vec_full_reg_offset(s, a->rm),
448 vec_full_reg_offset(s, a->ra), vsz, vsz);
449 }
450 return true;
911cdc6d
RH
451}
452
39eea561
RH
453/* Invoke a vector move on two Zregs. */
454static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 455{
f7d79c41 456 if (sve_access_check(s)) {
5f730621
RH
457 unsigned vsz = vec_full_reg_size(s);
458 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
459 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
460 }
461 return true;
38388f7e
RH
462}
463
d9d78dcc
RH
464/* Initialize a Zreg with replications of a 64-bit immediate. */
465static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
466{
467 unsigned vsz = vec_full_reg_size(s);
8711e71f 468 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
469}
470
516e246a 471/* Invoke a vector expander on three Pregs. */
23e5fa5f 472static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
dd81a8d7 473 int rd, int rn, int rm)
516e246a 474{
23e5fa5f
RH
475 if (sve_access_check(s)) {
476 unsigned psz = pred_gvec_reg_size(s);
477 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
478 pred_full_reg_offset(s, rn),
479 pred_full_reg_offset(s, rm), psz, psz);
480 }
481 return true;
516e246a
RH
482}
483
484/* Invoke a vector move on two Pregs. */
485static bool do_mov_p(DisasContext *s, int rd, int rn)
486{
d0b2df5a
RH
487 if (sve_access_check(s)) {
488 unsigned psz = pred_gvec_reg_size(s);
489 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
490 pred_full_reg_offset(s, rn), psz, psz);
491 }
492 return true;
516e246a
RH
493}
494
9e18d7a6
RH
495/* Set the cpu flags as per a return from an SVE helper. */
496static void do_pred_flags(TCGv_i32 t)
497{
498 tcg_gen_mov_i32(cpu_NF, t);
499 tcg_gen_andi_i32(cpu_ZF, t, 2);
500 tcg_gen_andi_i32(cpu_CF, t, 1);
501 tcg_gen_movi_i32(cpu_VF, 0);
502}
503
504/* Subroutines computing the ARM PredTest psuedofunction. */
505static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
506{
507 TCGv_i32 t = tcg_temp_new_i32();
508
509 gen_helper_sve_predtest1(t, d, g);
510 do_pred_flags(t);
511 tcg_temp_free_i32(t);
512}
513
514static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
515{
516 TCGv_ptr dptr = tcg_temp_new_ptr();
517 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 518 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
519
520 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
521 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 522
392acacc 523 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
524 tcg_temp_free_ptr(dptr);
525 tcg_temp_free_ptr(gptr);
526
527 do_pred_flags(t);
528 tcg_temp_free_i32(t);
529}
530
028e2a7b
RH
531/* For each element size, the bits within a predicate word that are active. */
532const uint64_t pred_esz_masks[4] = {
533 0xffffffffffffffffull, 0x5555555555555555ull,
534 0x1111111111111111ull, 0x0101010101010101ull
535};
536
c437c59b
RH
537static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
538{
539 unallocated_encoding(s);
540 return true;
541}
542
39eea561
RH
543/*
544 *** SVE Logical - Unpredicated Group
545 */
546
b262215b
RH
547TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
548TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
549TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
550TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 551
e6eba6e5
RH
552static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
553{
554 TCGv_i64 t = tcg_temp_new_i64();
555 uint64_t mask = dup_const(MO_8, 0xff >> sh);
556
557 tcg_gen_xor_i64(t, n, m);
558 tcg_gen_shri_i64(d, t, sh);
559 tcg_gen_shli_i64(t, t, 8 - sh);
560 tcg_gen_andi_i64(d, d, mask);
561 tcg_gen_andi_i64(t, t, ~mask);
562 tcg_gen_or_i64(d, d, t);
563 tcg_temp_free_i64(t);
564}
565
566static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
567{
568 TCGv_i64 t = tcg_temp_new_i64();
569 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
570
571 tcg_gen_xor_i64(t, n, m);
572 tcg_gen_shri_i64(d, t, sh);
573 tcg_gen_shli_i64(t, t, 16 - sh);
574 tcg_gen_andi_i64(d, d, mask);
575 tcg_gen_andi_i64(t, t, ~mask);
576 tcg_gen_or_i64(d, d, t);
577 tcg_temp_free_i64(t);
578}
579
580static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
581{
582 tcg_gen_xor_i32(d, n, m);
583 tcg_gen_rotri_i32(d, d, sh);
584}
585
586static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
587{
588 tcg_gen_xor_i64(d, n, m);
589 tcg_gen_rotri_i64(d, d, sh);
590}
591
592static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
593 TCGv_vec m, int64_t sh)
594{
595 tcg_gen_xor_vec(vece, d, n, m);
596 tcg_gen_rotri_vec(vece, d, d, sh);
597}
598
599void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
600 uint32_t rm_ofs, int64_t shift,
601 uint32_t opr_sz, uint32_t max_sz)
602{
603 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
604 static const GVecGen3i ops[4] = {
605 { .fni8 = gen_xar8_i64,
606 .fniv = gen_xar_vec,
607 .fno = gen_helper_sve2_xar_b,
608 .opt_opc = vecop,
609 .vece = MO_8 },
610 { .fni8 = gen_xar16_i64,
611 .fniv = gen_xar_vec,
612 .fno = gen_helper_sve2_xar_h,
613 .opt_opc = vecop,
614 .vece = MO_16 },
615 { .fni4 = gen_xar_i32,
616 .fniv = gen_xar_vec,
617 .fno = gen_helper_sve2_xar_s,
618 .opt_opc = vecop,
619 .vece = MO_32 },
620 { .fni8 = gen_xar_i64,
621 .fniv = gen_xar_vec,
622 .fno = gen_helper_gvec_xar_d,
623 .opt_opc = vecop,
624 .vece = MO_64 }
625 };
626 int esize = 8 << vece;
627
628 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
629 tcg_debug_assert(shift >= 0);
630 tcg_debug_assert(shift <= esize);
631 shift &= esize - 1;
632
633 if (shift == 0) {
634 /* xar with no rotate devolves to xor. */
635 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
636 } else {
637 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
638 shift, &ops[vece]);
639 }
640}
641
642static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
643{
644 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
645 return false;
646 }
647 if (sve_access_check(s)) {
648 unsigned vsz = vec_full_reg_size(s);
649 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
650 vec_full_reg_offset(s, a->rn),
651 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
652 }
653 return true;
654}
655
911cdc6d
RH
656static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
657{
658 tcg_gen_xor_i64(d, n, m);
659 tcg_gen_xor_i64(d, d, k);
660}
661
662static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
663 TCGv_vec m, TCGv_vec k)
664{
665 tcg_gen_xor_vec(vece, d, n, m);
666 tcg_gen_xor_vec(vece, d, d, k);
667}
668
669static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
670 uint32_t a, uint32_t oprsz, uint32_t maxsz)
671{
672 static const GVecGen4 op = {
673 .fni8 = gen_eor3_i64,
674 .fniv = gen_eor3_vec,
675 .fno = gen_helper_sve2_eor3,
676 .vece = MO_64,
677 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
678 };
679 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
680}
681
b773a5c8 682TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
683
684static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
685{
686 tcg_gen_andc_i64(d, m, k);
687 tcg_gen_xor_i64(d, d, n);
688}
689
690static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
691 TCGv_vec m, TCGv_vec k)
692{
693 tcg_gen_andc_vec(vece, d, m, k);
694 tcg_gen_xor_vec(vece, d, d, n);
695}
696
697static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
698 uint32_t a, uint32_t oprsz, uint32_t maxsz)
699{
700 static const GVecGen4 op = {
701 .fni8 = gen_bcax_i64,
702 .fniv = gen_bcax_vec,
703 .fno = gen_helper_sve2_bcax,
704 .vece = MO_64,
705 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
706 };
707 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
708}
709
b773a5c8 710TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
711
712static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
713 uint32_t a, uint32_t oprsz, uint32_t maxsz)
714{
715 /* BSL differs from the generic bitsel in argument ordering. */
716 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
717}
718
b773a5c8 719TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
720
721static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
722{
723 tcg_gen_andc_i64(n, k, n);
724 tcg_gen_andc_i64(m, m, k);
725 tcg_gen_or_i64(d, n, m);
726}
727
728static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
729 TCGv_vec m, TCGv_vec k)
730{
731 if (TCG_TARGET_HAS_bitsel_vec) {
732 tcg_gen_not_vec(vece, n, n);
733 tcg_gen_bitsel_vec(vece, d, k, n, m);
734 } else {
735 tcg_gen_andc_vec(vece, n, k, n);
736 tcg_gen_andc_vec(vece, m, m, k);
737 tcg_gen_or_vec(vece, d, n, m);
738 }
739}
740
741static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
742 uint32_t a, uint32_t oprsz, uint32_t maxsz)
743{
744 static const GVecGen4 op = {
745 .fni8 = gen_bsl1n_i64,
746 .fniv = gen_bsl1n_vec,
747 .fno = gen_helper_sve2_bsl1n,
748 .vece = MO_64,
749 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
750 };
751 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
752}
753
b773a5c8 754TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
755
756static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
757{
758 /*
759 * Z[dn] = (n & k) | (~m & ~k)
760 * = | ~(m | k)
761 */
762 tcg_gen_and_i64(n, n, k);
763 if (TCG_TARGET_HAS_orc_i64) {
764 tcg_gen_or_i64(m, m, k);
765 tcg_gen_orc_i64(d, n, m);
766 } else {
767 tcg_gen_nor_i64(m, m, k);
768 tcg_gen_or_i64(d, n, m);
769 }
770}
771
772static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
773 TCGv_vec m, TCGv_vec k)
774{
775 if (TCG_TARGET_HAS_bitsel_vec) {
776 tcg_gen_not_vec(vece, m, m);
777 tcg_gen_bitsel_vec(vece, d, k, n, m);
778 } else {
779 tcg_gen_and_vec(vece, n, n, k);
780 tcg_gen_or_vec(vece, m, m, k);
781 tcg_gen_orc_vec(vece, d, n, m);
782 }
783}
784
785static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
786 uint32_t a, uint32_t oprsz, uint32_t maxsz)
787{
788 static const GVecGen4 op = {
789 .fni8 = gen_bsl2n_i64,
790 .fniv = gen_bsl2n_vec,
791 .fno = gen_helper_sve2_bsl2n,
792 .vece = MO_64,
793 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
794 };
795 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
796}
797
b773a5c8 798TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
799
800static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
801{
802 tcg_gen_and_i64(n, n, k);
803 tcg_gen_andc_i64(m, m, k);
804 tcg_gen_nor_i64(d, n, m);
805}
806
807static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
808 TCGv_vec m, TCGv_vec k)
809{
810 tcg_gen_bitsel_vec(vece, d, k, n, m);
811 tcg_gen_not_vec(vece, d, d);
812}
813
814static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
815 uint32_t a, uint32_t oprsz, uint32_t maxsz)
816{
817 static const GVecGen4 op = {
818 .fni8 = gen_nbsl_i64,
819 .fniv = gen_nbsl_vec,
820 .fno = gen_helper_sve2_nbsl,
821 .vece = MO_64,
822 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
823 };
824 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
825}
826
b773a5c8 827TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 828
fea98f9c
RH
829/*
830 *** SVE Integer Arithmetic - Unpredicated Group
831 */
832
b262215b
RH
833TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
834TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
835TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
836TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
837TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
838TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 839
f97cfd59
RH
840/*
841 *** SVE Integer Arithmetic - Binary Predicated Group
842 */
843
a2103582
RH
844/* Select active elememnts from Zn and inactive elements from Zm,
845 * storing the result in Zd.
846 */
68cc4ee3 847static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
848{
849 static gen_helper_gvec_4 * const fns[4] = {
850 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
851 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
852 };
68cc4ee3 853 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
854}
855
8e7acb24
RH
856#define DO_ZPZZ(NAME, FEAT, name) \
857 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
858 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
859 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 860 }; \
8e7acb24
RH
861 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
862 name##_zpzz_fns[a->esz], a, 0)
863
864DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
865DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
866DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
867DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
868
869DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
870DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
871
872DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
873DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
874DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
875DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
876DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
877DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
878
879DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
880DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
881DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
882
883DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
884DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
885DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
886
887static gen_helper_gvec_4 * const sdiv_fns[4] = {
888 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
889};
890TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 891
8e7acb24
RH
892static gen_helper_gvec_4 * const udiv_fns[4] = {
893 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
894};
895TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 896
29693f5f 897TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
d3fe4a29 898
afac6d04
RH
899/*
900 *** SVE Integer Arithmetic - Unary Predicated Group
901 */
902
817bd5c9
RH
903#define DO_ZPZ(NAME, FEAT, name) \
904 static gen_helper_gvec_3 * const name##_fns[4] = { \
905 gen_helper_##name##_b, gen_helper_##name##_h, \
906 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 907 }; \
817bd5c9
RH
908 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
909
910DO_ZPZ(CLS, aa64_sve, sve_cls)
911DO_ZPZ(CLZ, aa64_sve, sve_clz)
912DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
913DO_ZPZ(CNOT, aa64_sve, sve_cnot)
914DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
915DO_ZPZ(ABS, aa64_sve, sve_abs)
916DO_ZPZ(NEG, aa64_sve, sve_neg)
917DO_ZPZ(RBIT, aa64_sve, sve_rbit)
918
919static gen_helper_gvec_3 * const fabs_fns[4] = {
920 NULL, gen_helper_sve_fabs_h,
921 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
922};
923TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 924
817bd5c9
RH
925static gen_helper_gvec_3 * const fneg_fns[4] = {
926 NULL, gen_helper_sve_fneg_h,
927 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
928};
929TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 930
817bd5c9
RH
931static gen_helper_gvec_3 * const sxtb_fns[4] = {
932 NULL, gen_helper_sve_sxtb_h,
933 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
934};
935TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 936
817bd5c9
RH
937static gen_helper_gvec_3 * const uxtb_fns[4] = {
938 NULL, gen_helper_sve_uxtb_h,
939 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
940};
941TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 942
817bd5c9
RH
943static gen_helper_gvec_3 * const sxth_fns[4] = {
944 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
945};
946TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 947
817bd5c9
RH
948static gen_helper_gvec_3 * const uxth_fns[4] = {
949 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
950};
951TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 952
817bd5c9
RH
953TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
954 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
955TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
956 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 957
047cec97
RH
958/*
959 *** SVE Integer Reduction Group
960 */
961
962typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
963static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
964 gen_helper_gvec_reduc *fn)
965{
966 unsigned vsz = vec_full_reg_size(s);
967 TCGv_ptr t_zn, t_pg;
968 TCGv_i32 desc;
969 TCGv_i64 temp;
970
971 if (fn == NULL) {
972 return false;
973 }
974 if (!sve_access_check(s)) {
975 return true;
976 }
977
c6a59b55 978 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
979 temp = tcg_temp_new_i64();
980 t_zn = tcg_temp_new_ptr();
981 t_pg = tcg_temp_new_ptr();
982
983 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
984 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
985 fn(temp, t_zn, t_pg, desc);
986 tcg_temp_free_ptr(t_zn);
987 tcg_temp_free_ptr(t_pg);
047cec97
RH
988
989 write_fp_dreg(s, a->rd, temp);
990 tcg_temp_free_i64(temp);
991 return true;
992}
993
994#define DO_VPZ(NAME, name) \
9ac24f1f 995 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
996 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
997 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
998 }; \
9ac24f1f 999 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
1000
1001DO_VPZ(ORV, orv)
1002DO_VPZ(ANDV, andv)
1003DO_VPZ(EORV, eorv)
1004
1005DO_VPZ(UADDV, uaddv)
1006DO_VPZ(SMAXV, smaxv)
1007DO_VPZ(UMAXV, umaxv)
1008DO_VPZ(SMINV, sminv)
1009DO_VPZ(UMINV, uminv)
1010
9ac24f1f
RH
1011static gen_helper_gvec_reduc * const saddv_fns[4] = {
1012 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
1013 gen_helper_sve_saddv_s, NULL
1014};
1015TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
1016
1017#undef DO_VPZ
1018
ccd841c3
RH
1019/*
1020 *** SVE Shift by Immediate - Predicated Group
1021 */
1022
60245996
RH
1023/*
1024 * Copy Zn into Zd, storing zeros into inactive elements.
1025 * If invert, store zeros into the active elements.
ccd841c3 1026 */
60245996
RH
1027static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1028 int esz, bool invert)
ccd841c3 1029{
60245996
RH
1030 static gen_helper_gvec_3 * const fns[4] = {
1031 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1032 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 1033 };
8fb27a21 1034 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
1035}
1036
73c558a8
RH
1037static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1038 gen_helper_gvec_3 * const fns[4])
ccd841c3 1039{
73c558a8
RH
1040 int max;
1041
ccd841c3
RH
1042 if (a->esz < 0) {
1043 /* Invalid tsz encoding -- see tszimm_esz. */
1044 return false;
1045 }
73c558a8
RH
1046
1047 /*
1048 * Shift by element size is architecturally valid.
1049 * For arithmetic right-shift, it's the same as by one less.
1050 * For logical shifts and ASRD, it is a zeroing operation.
1051 */
1052 max = 8 << a->esz;
1053 if (a->imm >= max) {
1054 if (asr) {
1055 a->imm = max - 1;
1056 } else {
1057 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1058 }
1059 }
afa2529c 1060 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
1061}
1062
5cccd1f1
RH
1063static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1064 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1065 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1066};
1067TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 1068
5cccd1f1
RH
1069static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1070 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1071 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1072};
1073TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 1074
5cccd1f1
RH
1075static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1076 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1077 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1078};
1079TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 1080
5cccd1f1
RH
1081static gen_helper_gvec_3 * const asrd_fns[4] = {
1082 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1083 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1084};
1085TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 1086
4df37e41
RH
1087static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1088 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1089 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1090};
1091TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1092 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1093
4df37e41
RH
1094static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1095 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1096 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1097};
1098TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1099 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1100
4df37e41
RH
1101static gen_helper_gvec_3 * const srshr_fns[4] = {
1102 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1103 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1104};
1105TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1106 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1107
4df37e41
RH
1108static gen_helper_gvec_3 * const urshr_fns[4] = {
1109 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1110 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1111};
1112TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1113 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1114
4df37e41
RH
1115static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1116 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1117 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1118};
1119TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1120 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1121
fe7f8dfb
RH
1122/*
1123 *** SVE Bitwise Shift - Predicated Group
1124 */
1125
1126#define DO_ZPZW(NAME, name) \
8e7acb24 1127 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1128 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1129 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1130 }; \
8e7acb24
RH
1131 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1132 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1133
1134DO_ZPZW(ASR, asr)
1135DO_ZPZW(LSR, lsr)
1136DO_ZPZW(LSL, lsl)
1137
1138#undef DO_ZPZW
1139
d9d78dcc
RH
1140/*
1141 *** SVE Bitwise Shift - Unpredicated Group
1142 */
1143
1144static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1145 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1146 int64_t, uint32_t, uint32_t))
1147{
1148 if (a->esz < 0) {
1149 /* Invalid tsz encoding -- see tszimm_esz. */
1150 return false;
1151 }
1152 if (sve_access_check(s)) {
1153 unsigned vsz = vec_full_reg_size(s);
1154 /* Shift by element size is architecturally valid. For
1155 arithmetic right-shift, it's the same as by one less.
1156 Otherwise it is a zeroing operation. */
1157 if (a->imm >= 8 << a->esz) {
1158 if (asr) {
1159 a->imm = (8 << a->esz) - 1;
1160 } else {
1161 do_dupi_z(s, a->rd, 0);
1162 return true;
1163 }
1164 }
1165 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1166 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1167 }
1168 return true;
1169}
1170
5e612f80
RH
1171TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1172TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1173TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1174
d9d78dcc 1175#define DO_ZZW(NAME, name) \
32e2ad65 1176 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1177 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1178 gen_helper_sve_##name##_zzw_s, NULL \
1179 }; \
32e2ad65
RH
1180 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1181 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1182
32e2ad65
RH
1183DO_ZZW(ASR_zzw, asr)
1184DO_ZZW(LSR_zzw, lsr)
1185DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1186
1187#undef DO_ZZW
1188
96a36e4a
RH
1189/*
1190 *** SVE Integer Multiply-Add Group
1191 */
1192
1193static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1194 gen_helper_gvec_5 *fn)
1195{
1196 if (sve_access_check(s)) {
1197 unsigned vsz = vec_full_reg_size(s);
1198 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1199 vec_full_reg_offset(s, a->ra),
1200 vec_full_reg_offset(s, a->rn),
1201 vec_full_reg_offset(s, a->rm),
1202 pred_full_reg_offset(s, a->pg),
1203 vsz, vsz, 0, fn);
1204 }
1205 return true;
1206}
1207
dc67e645
RH
1208static gen_helper_gvec_5 * const mla_fns[4] = {
1209 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1210 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1211};
1212TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1213
dc67e645
RH
1214static gen_helper_gvec_5 * const mls_fns[4] = {
1215 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1216 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1217};
1218TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1219
9a56c9c3
RH
1220/*
1221 *** SVE Index Generation Group
1222 */
1223
6687d05d 1224static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1225 TCGv_i64 start, TCGv_i64 incr)
1226{
6687d05d
RH
1227 unsigned vsz;
1228 TCGv_i32 desc;
1229 TCGv_ptr t_zd;
1230
1231 if (!sve_access_check(s)) {
1232 return true;
1233 }
1234
1235 vsz = vec_full_reg_size(s);
1236 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1237 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1238
1239 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1240 if (esz == 3) {
1241 gen_helper_sve_index_d(t_zd, start, incr, desc);
1242 } else {
1243 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1244 static index_fn * const fns[3] = {
1245 gen_helper_sve_index_b,
1246 gen_helper_sve_index_h,
1247 gen_helper_sve_index_s,
1248 };
1249 TCGv_i32 s32 = tcg_temp_new_i32();
1250 TCGv_i32 i32 = tcg_temp_new_i32();
1251
1252 tcg_gen_extrl_i64_i32(s32, start);
1253 tcg_gen_extrl_i64_i32(i32, incr);
1254 fns[esz](t_zd, s32, i32, desc);
1255
1256 tcg_temp_free_i32(s32);
1257 tcg_temp_free_i32(i32);
1258 }
1259 tcg_temp_free_ptr(t_zd);
6687d05d 1260 return true;
9a56c9c3
RH
1261}
1262
9aa60c83
RH
1263TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1264 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1265TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1266 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1267TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1268 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1269TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1270 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1271
96f922cc
RH
1272/*
1273 *** SVE Stack Allocation Group
1274 */
1275
3a7be554 1276static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1277{
1402a6b8
RH
1278 if (!dc_isar_feature(aa64_sve, s)) {
1279 return false;
1280 }
5de56742
AC
1281 if (sve_access_check(s)) {
1282 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1283 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1284 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1285 }
96f922cc
RH
1286 return true;
1287}
1288
0d935760
RH
1289static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
1290{
1291 if (!dc_isar_feature(aa64_sme, s)) {
1292 return false;
1293 }
1294 if (sme_enabled_check(s)) {
1295 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1296 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1297 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
1298 }
1299 return true;
1300}
1301
3a7be554 1302static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1303{
1402a6b8
RH
1304 if (!dc_isar_feature(aa64_sve, s)) {
1305 return false;
1306 }
5de56742
AC
1307 if (sve_access_check(s)) {
1308 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1309 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1310 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1311 }
96f922cc
RH
1312 return true;
1313}
1314
0d935760
RH
1315static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
1316{
1317 if (!dc_isar_feature(aa64_sme, s)) {
1318 return false;
1319 }
1320 if (sme_enabled_check(s)) {
1321 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1322 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1323 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
1324 }
1325 return true;
1326}
1327
3a7be554 1328static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1329{
1402a6b8
RH
1330 if (!dc_isar_feature(aa64_sve, s)) {
1331 return false;
1332 }
5de56742
AC
1333 if (sve_access_check(s)) {
1334 TCGv_i64 reg = cpu_reg(s, a->rd);
1335 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1336 }
96f922cc
RH
1337 return true;
1338}
1339
0d935760
RH
1340static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
1341{
1342 if (!dc_isar_feature(aa64_sme, s)) {
1343 return false;
1344 }
1345 if (sme_enabled_check(s)) {
1346 TCGv_i64 reg = cpu_reg(s, a->rd);
1347 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
1348 }
1349 return true;
1350}
1351
4b242d9c
RH
1352/*
1353 *** SVE Compute Vector Address Group
1354 */
1355
1356static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1357{
913a8a00 1358 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1359}
1360
7160c8c5
RH
1361TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1362TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1363TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1364TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1365
0762cd42
RH
1366/*
1367 *** SVE Integer Misc - Unpredicated Group
1368 */
1369
0ea3cdbf
RH
1370static gen_helper_gvec_2 * const fexpa_fns[4] = {
1371 NULL, gen_helper_sve_fexpa_h,
1372 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1373};
ca363d23
RH
1374TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
1375 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1376
32e2ad65
RH
1377static gen_helper_gvec_3 * const ftssel_fns[4] = {
1378 NULL, gen_helper_sve_ftssel_h,
1379 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1380};
ca363d23
RH
1381TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
1382 ftssel_fns[a->esz], a, 0)
a1f233f2 1383
516e246a
RH
1384/*
1385 *** SVE Predicate Logical Operations Group
1386 */
1387
1388static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1389 const GVecGen4 *gvec_op)
1390{
1391 if (!sve_access_check(s)) {
1392 return true;
1393 }
1394
1395 unsigned psz = pred_gvec_reg_size(s);
1396 int dofs = pred_full_reg_offset(s, a->rd);
1397 int nofs = pred_full_reg_offset(s, a->rn);
1398 int mofs = pred_full_reg_offset(s, a->rm);
1399 int gofs = pred_full_reg_offset(s, a->pg);
1400
dd81a8d7
RH
1401 if (!a->s) {
1402 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1403 return true;
1404 }
1405
516e246a
RH
1406 if (psz == 8) {
1407 /* Do the operation and the flags generation in temps. */
1408 TCGv_i64 pd = tcg_temp_new_i64();
1409 TCGv_i64 pn = tcg_temp_new_i64();
1410 TCGv_i64 pm = tcg_temp_new_i64();
1411 TCGv_i64 pg = tcg_temp_new_i64();
1412
1413 tcg_gen_ld_i64(pn, cpu_env, nofs);
1414 tcg_gen_ld_i64(pm, cpu_env, mofs);
1415 tcg_gen_ld_i64(pg, cpu_env, gofs);
1416
1417 gvec_op->fni8(pd, pn, pm, pg);
1418 tcg_gen_st_i64(pd, cpu_env, dofs);
1419
1420 do_predtest1(pd, pg);
1421
1422 tcg_temp_free_i64(pd);
1423 tcg_temp_free_i64(pn);
1424 tcg_temp_free_i64(pm);
1425 tcg_temp_free_i64(pg);
1426 } else {
1427 /* The operation and flags generation is large. The computation
1428 * of the flags depends on the original contents of the guarding
1429 * predicate. If the destination overwrites the guarding predicate,
1430 * then the easiest way to get this right is to save a copy.
1431 */
1432 int tofs = gofs;
1433 if (a->rd == a->pg) {
1434 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1435 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1436 }
1437
1438 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1439 do_predtest(s, dofs, tofs, psz / 8);
1440 }
1441 return true;
1442}
1443
1444static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1445{
1446 tcg_gen_and_i64(pd, pn, pm);
1447 tcg_gen_and_i64(pd, pd, pg);
1448}
1449
1450static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1451 TCGv_vec pm, TCGv_vec pg)
1452{
1453 tcg_gen_and_vec(vece, pd, pn, pm);
1454 tcg_gen_and_vec(vece, pd, pd, pg);
1455}
1456
3a7be554 1457static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1458{
1459 static const GVecGen4 op = {
1460 .fni8 = gen_and_pg_i64,
1461 .fniv = gen_and_pg_vec,
1462 .fno = gen_helper_sve_and_pppp,
1463 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1464 };
dd81a8d7 1465
1402a6b8
RH
1466 if (!dc_isar_feature(aa64_sve, s)) {
1467 return false;
1468 }
dd81a8d7 1469 if (!a->s) {
dd81a8d7
RH
1470 if (a->rn == a->rm) {
1471 if (a->pg == a->rn) {
23e5fa5f 1472 return do_mov_p(s, a->rd, a->rn);
dd81a8d7 1473 }
23e5fa5f 1474 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
dd81a8d7 1475 } else if (a->pg == a->rn || a->pg == a->rm) {
23e5fa5f 1476 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
516e246a 1477 }
516e246a 1478 }
dd81a8d7 1479 return do_pppp_flags(s, a, &op);
516e246a
RH
1480}
1481
1482static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1483{
1484 tcg_gen_andc_i64(pd, pn, pm);
1485 tcg_gen_and_i64(pd, pd, pg);
1486}
1487
1488static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1489 TCGv_vec pm, TCGv_vec pg)
1490{
1491 tcg_gen_andc_vec(vece, pd, pn, pm);
1492 tcg_gen_and_vec(vece, pd, pd, pg);
1493}
1494
3a7be554 1495static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1496{
1497 static const GVecGen4 op = {
1498 .fni8 = gen_bic_pg_i64,
1499 .fniv = gen_bic_pg_vec,
1500 .fno = gen_helper_sve_bic_pppp,
1501 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1502 };
dd81a8d7 1503
1402a6b8
RH
1504 if (!dc_isar_feature(aa64_sve, s)) {
1505 return false;
1506 }
dd81a8d7 1507 if (!a->s && a->pg == a->rn) {
23e5fa5f 1508 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
516e246a 1509 }
dd81a8d7 1510 return do_pppp_flags(s, a, &op);
516e246a
RH
1511}
1512
1513static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1514{
1515 tcg_gen_xor_i64(pd, pn, pm);
1516 tcg_gen_and_i64(pd, pd, pg);
1517}
1518
1519static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1520 TCGv_vec pm, TCGv_vec pg)
1521{
1522 tcg_gen_xor_vec(vece, pd, pn, pm);
1523 tcg_gen_and_vec(vece, pd, pd, pg);
1524}
1525
3a7be554 1526static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1527{
1528 static const GVecGen4 op = {
1529 .fni8 = gen_eor_pg_i64,
1530 .fniv = gen_eor_pg_vec,
1531 .fno = gen_helper_sve_eor_pppp,
1532 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1533 };
738b679c 1534
1402a6b8
RH
1535 if (!dc_isar_feature(aa64_sve, s)) {
1536 return false;
1537 }
738b679c
RH
1538 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1539 if (!a->s && a->pg == a->rm) {
1540 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1541 }
dd81a8d7 1542 return do_pppp_flags(s, a, &op);
516e246a
RH
1543}
1544
3a7be554 1545static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1546{
1402a6b8 1547 if (a->s || !dc_isar_feature(aa64_sve, s)) {
516e246a 1548 return false;
516e246a 1549 }
d4bc6232
RH
1550 if (sve_access_check(s)) {
1551 unsigned psz = pred_gvec_reg_size(s);
1552 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1553 pred_full_reg_offset(s, a->pg),
1554 pred_full_reg_offset(s, a->rn),
1555 pred_full_reg_offset(s, a->rm), psz, psz);
1556 }
1557 return true;
516e246a
RH
1558}
1559
1560static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1561{
1562 tcg_gen_or_i64(pd, pn, pm);
1563 tcg_gen_and_i64(pd, pd, pg);
1564}
1565
1566static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1567 TCGv_vec pm, TCGv_vec pg)
1568{
1569 tcg_gen_or_vec(vece, pd, pn, pm);
1570 tcg_gen_and_vec(vece, pd, pd, pg);
1571}
1572
3a7be554 1573static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1574{
1575 static const GVecGen4 op = {
1576 .fni8 = gen_orr_pg_i64,
1577 .fniv = gen_orr_pg_vec,
1578 .fno = gen_helper_sve_orr_pppp,
1579 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1580 };
dd81a8d7 1581
1402a6b8
RH
1582 if (!dc_isar_feature(aa64_sve, s)) {
1583 return false;
1584 }
dd81a8d7 1585 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1586 return do_mov_p(s, a->rd, a->rn);
516e246a 1587 }
dd81a8d7 1588 return do_pppp_flags(s, a, &op);
516e246a
RH
1589}
1590
1591static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1592{
1593 tcg_gen_orc_i64(pd, pn, pm);
1594 tcg_gen_and_i64(pd, pd, pg);
1595}
1596
1597static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1598 TCGv_vec pm, TCGv_vec pg)
1599{
1600 tcg_gen_orc_vec(vece, pd, pn, pm);
1601 tcg_gen_and_vec(vece, pd, pd, pg);
1602}
1603
3a7be554 1604static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1605{
1606 static const GVecGen4 op = {
1607 .fni8 = gen_orn_pg_i64,
1608 .fniv = gen_orn_pg_vec,
1609 .fno = gen_helper_sve_orn_pppp,
1610 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1611 };
1402a6b8
RH
1612
1613 if (!dc_isar_feature(aa64_sve, s)) {
1614 return false;
1615 }
dd81a8d7 1616 return do_pppp_flags(s, a, &op);
516e246a
RH
1617}
1618
1619static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1620{
1621 tcg_gen_or_i64(pd, pn, pm);
1622 tcg_gen_andc_i64(pd, pg, pd);
1623}
1624
1625static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1626 TCGv_vec pm, TCGv_vec pg)
1627{
1628 tcg_gen_or_vec(vece, pd, pn, pm);
1629 tcg_gen_andc_vec(vece, pd, pg, pd);
1630}
1631
3a7be554 1632static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1633{
1634 static const GVecGen4 op = {
1635 .fni8 = gen_nor_pg_i64,
1636 .fniv = gen_nor_pg_vec,
1637 .fno = gen_helper_sve_nor_pppp,
1638 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1639 };
1402a6b8
RH
1640
1641 if (!dc_isar_feature(aa64_sve, s)) {
1642 return false;
1643 }
dd81a8d7 1644 return do_pppp_flags(s, a, &op);
516e246a
RH
1645}
1646
1647static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1648{
1649 tcg_gen_and_i64(pd, pn, pm);
1650 tcg_gen_andc_i64(pd, pg, pd);
1651}
1652
1653static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1654 TCGv_vec pm, TCGv_vec pg)
1655{
1656 tcg_gen_and_vec(vece, pd, pn, pm);
1657 tcg_gen_andc_vec(vece, pd, pg, pd);
1658}
1659
3a7be554 1660static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1661{
1662 static const GVecGen4 op = {
1663 .fni8 = gen_nand_pg_i64,
1664 .fniv = gen_nand_pg_vec,
1665 .fno = gen_helper_sve_nand_pppp,
1666 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1667 };
1402a6b8
RH
1668
1669 if (!dc_isar_feature(aa64_sve, s)) {
1670 return false;
1671 }
dd81a8d7 1672 return do_pppp_flags(s, a, &op);
516e246a
RH
1673}
1674
9e18d7a6
RH
1675/*
1676 *** SVE Predicate Misc Group
1677 */
1678
3a7be554 1679static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6 1680{
1402a6b8
RH
1681 if (!dc_isar_feature(aa64_sve, s)) {
1682 return false;
1683 }
9e18d7a6
RH
1684 if (sve_access_check(s)) {
1685 int nofs = pred_full_reg_offset(s, a->rn);
1686 int gofs = pred_full_reg_offset(s, a->pg);
1687 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1688
1689 if (words == 1) {
1690 TCGv_i64 pn = tcg_temp_new_i64();
1691 TCGv_i64 pg = tcg_temp_new_i64();
1692
1693 tcg_gen_ld_i64(pn, cpu_env, nofs);
1694 tcg_gen_ld_i64(pg, cpu_env, gofs);
1695 do_predtest1(pn, pg);
1696
1697 tcg_temp_free_i64(pn);
1698 tcg_temp_free_i64(pg);
1699 } else {
1700 do_predtest(s, nofs, gofs, words);
1701 }
1702 }
1703 return true;
1704}
1705
028e2a7b
RH
1706/* See the ARM pseudocode DecodePredCount. */
1707static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1708{
1709 unsigned elements = fullsz >> esz;
1710 unsigned bound;
1711
1712 switch (pattern) {
1713 case 0x0: /* POW2 */
1714 return pow2floor(elements);
1715 case 0x1: /* VL1 */
1716 case 0x2: /* VL2 */
1717 case 0x3: /* VL3 */
1718 case 0x4: /* VL4 */
1719 case 0x5: /* VL5 */
1720 case 0x6: /* VL6 */
1721 case 0x7: /* VL7 */
1722 case 0x8: /* VL8 */
1723 bound = pattern;
1724 break;
1725 case 0x9: /* VL16 */
1726 case 0xa: /* VL32 */
1727 case 0xb: /* VL64 */
1728 case 0xc: /* VL128 */
1729 case 0xd: /* VL256 */
1730 bound = 16 << (pattern - 9);
1731 break;
1732 case 0x1d: /* MUL4 */
1733 return elements - elements % 4;
1734 case 0x1e: /* MUL3 */
1735 return elements - elements % 3;
1736 case 0x1f: /* ALL */
1737 return elements;
1738 default: /* #uimm5 */
1739 return 0;
1740 }
1741 return elements >= bound ? bound : 0;
1742}
1743
1744/* This handles all of the predicate initialization instructions,
1745 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1746 * so that decode_pred_count returns 0. For SETFFR, we will have
1747 * set RD == 16 == FFR.
1748 */
1749static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1750{
1751 if (!sve_access_check(s)) {
1752 return true;
1753 }
1754
1755 unsigned fullsz = vec_full_reg_size(s);
1756 unsigned ofs = pred_full_reg_offset(s, rd);
1757 unsigned numelem, setsz, i;
1758 uint64_t word, lastword;
1759 TCGv_i64 t;
1760
1761 numelem = decode_pred_count(fullsz, pat, esz);
1762
1763 /* Determine what we must store into each bit, and how many. */
1764 if (numelem == 0) {
1765 lastword = word = 0;
1766 setsz = fullsz;
1767 } else {
1768 setsz = numelem << esz;
1769 lastword = word = pred_esz_masks[esz];
1770 if (setsz % 64) {
973558a3 1771 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1772 }
1773 }
1774
1775 t = tcg_temp_new_i64();
1776 if (fullsz <= 64) {
1777 tcg_gen_movi_i64(t, lastword);
1778 tcg_gen_st_i64(t, cpu_env, ofs);
1779 goto done;
1780 }
1781
1782 if (word == lastword) {
1783 unsigned maxsz = size_for_gvec(fullsz / 8);
1784 unsigned oprsz = size_for_gvec(setsz / 8);
1785
1786 if (oprsz * 8 == setsz) {
8711e71f 1787 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1788 goto done;
1789 }
028e2a7b
RH
1790 }
1791
1792 setsz /= 8;
1793 fullsz /= 8;
1794
1795 tcg_gen_movi_i64(t, word);
973558a3 1796 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1797 tcg_gen_st_i64(t, cpu_env, ofs + i);
1798 }
1799 if (lastword != word) {
1800 tcg_gen_movi_i64(t, lastword);
1801 tcg_gen_st_i64(t, cpu_env, ofs + i);
1802 i += 8;
1803 }
1804 if (i < fullsz) {
1805 tcg_gen_movi_i64(t, 0);
1806 for (; i < fullsz; i += 8) {
1807 tcg_gen_st_i64(t, cpu_env, ofs + i);
1808 }
1809 }
1810
1811 done:
1812 tcg_temp_free_i64(t);
1813
1814 /* PTRUES */
1815 if (setflag) {
1816 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1817 tcg_gen_movi_i32(cpu_CF, word == 0);
1818 tcg_gen_movi_i32(cpu_VF, 0);
1819 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1820 }
1821 return true;
1822}
1823
b03a8501 1824TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1825
b03a8501 1826/* Note pat == 31 is #all, to set all elements. */
39001c6b
RH
1827TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
1828 do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1829
b03a8501
RH
1830/* Note pat == 32 is #unimp, to set no elements. */
1831TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1832
3a7be554 1833static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1834{
1835 /* The path through do_pppp_flags is complicated enough to want to avoid
1836 * duplication. Frob the arguments into the form of a predicated AND.
1837 */
1838 arg_rprr_s alt_a = {
1839 .rd = a->rd, .pg = a->pg, .s = a->s,
1840 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1841 };
39001c6b
RH
1842
1843 s->is_nonstreaming = true;
3a7be554 1844 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1845}
1846
39001c6b
RH
1847TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1848TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1849
1850static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1851 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1852 TCGv_ptr, TCGv_i32))
1853{
1854 if (!sve_access_check(s)) {
1855 return true;
1856 }
1857
1858 TCGv_ptr t_pd = tcg_temp_new_ptr();
1859 TCGv_ptr t_pg = tcg_temp_new_ptr();
1860 TCGv_i32 t;
86300b5d 1861 unsigned desc = 0;
028e2a7b 1862
86300b5d
RH
1863 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1864 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1865
1866 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1867 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1868 t = tcg_temp_new_i32();
028e2a7b 1869
392acacc 1870 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1871 tcg_temp_free_ptr(t_pd);
1872 tcg_temp_free_ptr(t_pg);
1873
1874 do_pred_flags(t);
1875 tcg_temp_free_i32(t);
1876 return true;
1877}
1878
d95040e3
RH
1879TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1880TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1881
24e82e68
RH
1882/*
1883 *** SVE Element Count Group
1884 */
1885
1886/* Perform an inline saturating addition of a 32-bit value within
1887 * a 64-bit register. The second operand is known to be positive,
1888 * which halves the comparisions we must perform to bound the result.
1889 */
1890static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1891{
1892 int64_t ibound;
24e82e68
RH
1893
1894 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1895 if (u) {
1896 tcg_gen_ext32u_i64(reg, reg);
1897 } else {
1898 tcg_gen_ext32s_i64(reg, reg);
1899 }
1900 if (d) {
1901 tcg_gen_sub_i64(reg, reg, val);
1902 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1903 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1904 } else {
1905 tcg_gen_add_i64(reg, reg, val);
1906 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1907 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1908 }
24e82e68
RH
1909}
1910
1911/* Similarly with 64-bit values. */
1912static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1913{
1914 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1915 TCGv_i64 t2;
1916
1917 if (u) {
1918 if (d) {
1919 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1920 t2 = tcg_constant_i64(0);
1921 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1922 } else {
1923 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1924 t2 = tcg_constant_i64(-1);
1925 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1926 }
1927 } else {
35a1ec8e 1928 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1929 if (d) {
1930 /* Detect signed overflow for subtraction. */
1931 tcg_gen_xor_i64(t0, reg, val);
1932 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1933 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1934 tcg_gen_and_i64(t0, t0, reg);
1935
1936 /* Bound the result. */
1937 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1938 t2 = tcg_constant_i64(0);
24e82e68
RH
1939 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1940 } else {
1941 /* Detect signed overflow for addition. */
1942 tcg_gen_xor_i64(t0, reg, val);
1943 tcg_gen_add_i64(reg, reg, val);
1944 tcg_gen_xor_i64(t1, reg, val);
1945 tcg_gen_andc_i64(t0, t1, t0);
1946
1947 /* Bound the result. */
1948 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1949 t2 = tcg_constant_i64(0);
24e82e68
RH
1950 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1951 }
35a1ec8e 1952 tcg_temp_free_i64(t1);
24e82e68
RH
1953 }
1954 tcg_temp_free_i64(t0);
24e82e68
RH
1955}
1956
1957/* Similarly with a vector and a scalar operand. */
1958static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1959 TCGv_i64 val, bool u, bool d)
1960{
1961 unsigned vsz = vec_full_reg_size(s);
1962 TCGv_ptr dptr, nptr;
1963 TCGv_i32 t32, desc;
1964 TCGv_i64 t64;
1965
1966 dptr = tcg_temp_new_ptr();
1967 nptr = tcg_temp_new_ptr();
1968 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1969 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1970 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1971
1972 switch (esz) {
1973 case MO_8:
1974 t32 = tcg_temp_new_i32();
1975 tcg_gen_extrl_i64_i32(t32, val);
1976 if (d) {
1977 tcg_gen_neg_i32(t32, t32);
1978 }
1979 if (u) {
1980 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1981 } else {
1982 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1983 }
1984 tcg_temp_free_i32(t32);
1985 break;
1986
1987 case MO_16:
1988 t32 = tcg_temp_new_i32();
1989 tcg_gen_extrl_i64_i32(t32, val);
1990 if (d) {
1991 tcg_gen_neg_i32(t32, t32);
1992 }
1993 if (u) {
1994 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1995 } else {
1996 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1997 }
1998 tcg_temp_free_i32(t32);
1999 break;
2000
2001 case MO_32:
2002 t64 = tcg_temp_new_i64();
2003 if (d) {
2004 tcg_gen_neg_i64(t64, val);
2005 } else {
2006 tcg_gen_mov_i64(t64, val);
2007 }
2008 if (u) {
2009 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2010 } else {
2011 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2012 }
2013 tcg_temp_free_i64(t64);
2014 break;
2015
2016 case MO_64:
2017 if (u) {
2018 if (d) {
2019 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2020 } else {
2021 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2022 }
2023 } else if (d) {
2024 t64 = tcg_temp_new_i64();
2025 tcg_gen_neg_i64(t64, val);
2026 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2027 tcg_temp_free_i64(t64);
2028 } else {
2029 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2030 }
2031 break;
2032
2033 default:
2034 g_assert_not_reached();
2035 }
2036
2037 tcg_temp_free_ptr(dptr);
2038 tcg_temp_free_ptr(nptr);
24e82e68
RH
2039}
2040
3a7be554 2041static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68 2042{
1402a6b8
RH
2043 if (!dc_isar_feature(aa64_sve, s)) {
2044 return false;
2045 }
24e82e68
RH
2046 if (sve_access_check(s)) {
2047 unsigned fullsz = vec_full_reg_size(s);
2048 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2049 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2050 }
2051 return true;
2052}
2053
3a7be554 2054static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68 2055{
1402a6b8
RH
2056 if (!dc_isar_feature(aa64_sve, s)) {
2057 return false;
2058 }
24e82e68
RH
2059 if (sve_access_check(s)) {
2060 unsigned fullsz = vec_full_reg_size(s);
2061 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2062 int inc = numelem * a->imm * (a->d ? -1 : 1);
2063 TCGv_i64 reg = cpu_reg(s, a->rd);
2064
2065 tcg_gen_addi_i64(reg, reg, inc);
2066 }
2067 return true;
2068}
2069
3a7be554 2070static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68 2071{
1402a6b8
RH
2072 if (!dc_isar_feature(aa64_sve, s)) {
2073 return false;
2074 }
24e82e68
RH
2075 if (!sve_access_check(s)) {
2076 return true;
2077 }
2078
2079 unsigned fullsz = vec_full_reg_size(s);
2080 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2081 int inc = numelem * a->imm;
2082 TCGv_i64 reg = cpu_reg(s, a->rd);
2083
2084 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2085 if (inc == 0) {
2086 if (a->u) {
2087 tcg_gen_ext32u_i64(reg, reg);
2088 } else {
2089 tcg_gen_ext32s_i64(reg, reg);
2090 }
2091 } else {
d681f125 2092 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2093 }
2094 return true;
2095}
2096
3a7be554 2097static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68 2098{
1402a6b8
RH
2099 if (!dc_isar_feature(aa64_sve, s)) {
2100 return false;
2101 }
24e82e68
RH
2102 if (!sve_access_check(s)) {
2103 return true;
2104 }
2105
2106 unsigned fullsz = vec_full_reg_size(s);
2107 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2108 int inc = numelem * a->imm;
2109 TCGv_i64 reg = cpu_reg(s, a->rd);
2110
2111 if (inc != 0) {
d681f125 2112 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2113 }
2114 return true;
2115}
2116
3a7be554 2117static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68 2118{
1402a6b8 2119 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
24e82e68
RH
2120 return false;
2121 }
2122
2123 unsigned fullsz = vec_full_reg_size(s);
2124 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2125 int inc = numelem * a->imm;
2126
2127 if (inc != 0) {
2128 if (sve_access_check(s)) {
24e82e68
RH
2129 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2130 vec_full_reg_offset(s, a->rn),
d681f125
RH
2131 tcg_constant_i64(a->d ? -inc : inc),
2132 fullsz, fullsz);
24e82e68
RH
2133 }
2134 } else {
2135 do_mov_z(s, a->rd, a->rn);
2136 }
2137 return true;
2138}
2139
3a7be554 2140static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68 2141{
1402a6b8 2142 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
24e82e68
RH
2143 return false;
2144 }
2145
2146 unsigned fullsz = vec_full_reg_size(s);
2147 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2148 int inc = numelem * a->imm;
2149
2150 if (inc != 0) {
2151 if (sve_access_check(s)) {
d681f125
RH
2152 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2153 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
2154 }
2155 } else {
2156 do_mov_z(s, a->rd, a->rn);
2157 }
2158 return true;
2159}
2160
e1fa1164
RH
2161/*
2162 *** SVE Bitwise Immediate Group
2163 */
2164
2165static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2166{
2167 uint64_t imm;
2168 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2169 extract32(a->dbm, 0, 6),
2170 extract32(a->dbm, 6, 6))) {
2171 return false;
2172 }
faf915e2 2173 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2174}
2175
15a314da
RH
2176TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2177TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2178TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2179
3a7be554 2180static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2181{
2182 uint64_t imm;
1402a6b8
RH
2183
2184 if (!dc_isar_feature(aa64_sve, s)) {
2185 return false;
2186 }
e1fa1164
RH
2187 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2188 extract32(a->dbm, 0, 6),
2189 extract32(a->dbm, 6, 6))) {
2190 return false;
2191 }
2192 if (sve_access_check(s)) {
2193 do_dupi_z(s, a->rd, imm);
2194 }
2195 return true;
2196}
2197
f25a2361
RH
2198/*
2199 *** SVE Integer Wide Immediate - Predicated Group
2200 */
2201
2202/* Implement all merging copies. This is used for CPY (immediate),
2203 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2204 */
2205static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2206 TCGv_i64 val)
2207{
2208 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2209 static gen_cpy * const fns[4] = {
2210 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2211 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2212 };
2213 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2214 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2215 TCGv_ptr t_zd = tcg_temp_new_ptr();
2216 TCGv_ptr t_zn = tcg_temp_new_ptr();
2217 TCGv_ptr t_pg = tcg_temp_new_ptr();
2218
2219 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2220 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2221 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2222
2223 fns[esz](t_zd, t_zn, t_pg, val, desc);
2224
2225 tcg_temp_free_ptr(t_zd);
2226 tcg_temp_free_ptr(t_zn);
2227 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2228}
2229
3a7be554 2230static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361 2231{
1402a6b8 2232 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
f25a2361
RH
2233 return false;
2234 }
2235 if (sve_access_check(s)) {
2236 /* Decode the VFP immediate. */
2237 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2238 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2239 }
2240 return true;
2241}
2242
3a7be554 2243static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2244{
1402a6b8
RH
2245 if (!dc_isar_feature(aa64_sve, s)) {
2246 return false;
2247 }
f25a2361 2248 if (sve_access_check(s)) {
e152b48b 2249 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2250 }
2251 return true;
2252}
2253
3a7be554 2254static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2255{
2256 static gen_helper_gvec_2i * const fns[4] = {
2257 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2258 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2259 };
2260
1402a6b8
RH
2261 if (!dc_isar_feature(aa64_sve, s)) {
2262 return false;
2263 }
f25a2361
RH
2264 if (sve_access_check(s)) {
2265 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2266 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2267 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2268 tcg_constant_i64(a->imm),
2269 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2270 }
2271 return true;
2272}
2273
b94f8f60
RH
2274/*
2275 *** SVE Permute Extract Group
2276 */
2277
75114792 2278static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2279{
2280 if (!sve_access_check(s)) {
2281 return true;
2282 }
2283
2284 unsigned vsz = vec_full_reg_size(s);
75114792 2285 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2286 unsigned n_siz = vsz - n_ofs;
75114792
SL
2287 unsigned d = vec_full_reg_offset(s, rd);
2288 unsigned n = vec_full_reg_offset(s, rn);
2289 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2290
2291 /* Use host vector move insns if we have appropriate sizes
2292 * and no unfortunate overlap.
2293 */
2294 if (m != d
2295 && n_ofs == size_for_gvec(n_ofs)
2296 && n_siz == size_for_gvec(n_siz)
2297 && (d != n || n_siz <= n_ofs)) {
2298 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2299 if (n_ofs != 0) {
2300 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2301 }
2302 } else {
2303 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2304 }
2305 return true;
2306}
2307
c799c115
RH
2308TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2309TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2310
30562ab7
RH
2311/*
2312 *** SVE Permute - Unpredicated Group
2313 */
2314
3a7be554 2315static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7 2316{
1402a6b8
RH
2317 if (!dc_isar_feature(aa64_sve, s)) {
2318 return false;
2319 }
30562ab7
RH
2320 if (sve_access_check(s)) {
2321 unsigned vsz = vec_full_reg_size(s);
2322 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2323 vsz, vsz, cpu_reg_sp(s, a->rn));
2324 }
2325 return true;
2326}
2327
3a7be554 2328static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7 2329{
1402a6b8
RH
2330 if (!dc_isar_feature(aa64_sve, s)) {
2331 return false;
2332 }
30562ab7
RH
2333 if ((a->imm & 0x1f) == 0) {
2334 return false;
2335 }
2336 if (sve_access_check(s)) {
2337 unsigned vsz = vec_full_reg_size(s);
2338 unsigned dofs = vec_full_reg_offset(s, a->rd);
2339 unsigned esz, index;
2340
2341 esz = ctz32(a->imm);
2342 index = a->imm >> (esz + 1);
2343
2344 if ((index << esz) < vsz) {
2345 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2346 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2347 } else {
7e17d50e
RH
2348 /*
2349 * While dup_mem handles 128-bit elements, dup_imm does not.
2350 * Thankfully element size doesn't matter for splatting zero.
2351 */
2352 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2353 }
2354 }
2355 return true;
2356}
2357
2358static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2359{
2360 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2361 static gen_insr * const fns[4] = {
2362 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2363 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2364 };
2365 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2366 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2367 TCGv_ptr t_zd = tcg_temp_new_ptr();
2368 TCGv_ptr t_zn = tcg_temp_new_ptr();
2369
2370 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2371 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2372
2373 fns[a->esz](t_zd, t_zn, val, desc);
2374
2375 tcg_temp_free_ptr(t_zd);
2376 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2377}
2378
3a7be554 2379static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7 2380{
1402a6b8
RH
2381 if (!dc_isar_feature(aa64_sve, s)) {
2382 return false;
2383 }
30562ab7
RH
2384 if (sve_access_check(s)) {
2385 TCGv_i64 t = tcg_temp_new_i64();
2386 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2387 do_insr_i64(s, a, t);
2388 tcg_temp_free_i64(t);
2389 }
2390 return true;
2391}
2392
3a7be554 2393static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7 2394{
1402a6b8
RH
2395 if (!dc_isar_feature(aa64_sve, s)) {
2396 return false;
2397 }
30562ab7
RH
2398 if (sve_access_check(s)) {
2399 do_insr_i64(s, a, cpu_reg(s, a->rm));
2400 }
2401 return true;
2402}
2403
0ea3cdbf
RH
2404static gen_helper_gvec_2 * const rev_fns[4] = {
2405 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2406 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2407};
2408TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2409
32e2ad65
RH
2410static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2411 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2412 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2413};
2414TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2415
5f425b92
RH
2416static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2417 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2418 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2419};
2420TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2421 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2422
32e2ad65
RH
2423static gen_helper_gvec_3 * const tbx_fns[4] = {
2424 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2425 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2426};
2427TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2428
3a7be554 2429static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2430{
2431 static gen_helper_gvec_2 * const fns[4][2] = {
2432 { NULL, NULL },
2433 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2434 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2435 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2436 };
2437
1402a6b8 2438 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
30562ab7
RH
2439 return false;
2440 }
2441 if (sve_access_check(s)) {
2442 unsigned vsz = vec_full_reg_size(s);
2443 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2444 vec_full_reg_offset(s, a->rn)
2445 + (a->h ? vsz / 2 : 0),
2446 vsz, vsz, 0, fns[a->esz][a->u]);
2447 }
2448 return true;
2449}
2450
d731d8cb
RH
2451/*
2452 *** SVE Permute - Predicates Group
2453 */
2454
2455static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2456 gen_helper_gvec_3 *fn)
2457{
2458 if (!sve_access_check(s)) {
2459 return true;
2460 }
2461
2462 unsigned vsz = pred_full_reg_size(s);
2463
d731d8cb
RH
2464 TCGv_ptr t_d = tcg_temp_new_ptr();
2465 TCGv_ptr t_n = tcg_temp_new_ptr();
2466 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2467 uint32_t desc = 0;
d731d8cb 2468
f9b0fcce
RH
2469 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2470 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2471 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2472
2473 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2474 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2475 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2476
c6a59b55 2477 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2478
2479 tcg_temp_free_ptr(t_d);
2480 tcg_temp_free_ptr(t_n);
2481 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2482 return true;
2483}
2484
2485static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2486 gen_helper_gvec_2 *fn)
2487{
2488 if (!sve_access_check(s)) {
2489 return true;
2490 }
2491
2492 unsigned vsz = pred_full_reg_size(s);
2493 TCGv_ptr t_d = tcg_temp_new_ptr();
2494 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2495 uint32_t desc = 0;
d731d8cb
RH
2496
2497 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2498 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2499
70acaafe
RH
2500 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2501 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2502 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2503
c6a59b55 2504 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2505
d731d8cb
RH
2506 tcg_temp_free_ptr(t_d);
2507 tcg_temp_free_ptr(t_n);
2508 return true;
2509}
2510
bdb349f5
RH
2511TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2512TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2513TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2514TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2515TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2516TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2517
1d0fce4b
RH
2518TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2519TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2520TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2521
234b48e9
RH
2522/*
2523 *** SVE Permute - Interleaving Group
2524 */
2525
a95b9618
RH
2526static gen_helper_gvec_3 * const zip_fns[4] = {
2527 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2528 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2529};
2530TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2531 zip_fns[a->esz], a, 0)
2532TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2533 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2534
2535TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2536 gen_helper_sve2_zip_q, a, 0)
2537TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2538 gen_helper_sve2_zip_q, a,
2539 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2540
234b48e9
RH
2541static gen_helper_gvec_3 * const uzp_fns[4] = {
2542 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2543 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2544};
2545
32e2ad65
RH
2546TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2547 uzp_fns[a->esz], a, 0)
2548TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2549 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2550
32e2ad65
RH
2551TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2552 gen_helper_sve2_uzp_q, a, 0)
2553TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2554 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2555
234b48e9
RH
2556static gen_helper_gvec_3 * const trn_fns[4] = {
2557 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2558 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2559};
2560
32e2ad65
RH
2561TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2562 trn_fns[a->esz], a, 0)
2563TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2564 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2565
32e2ad65
RH
2566TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2567 gen_helper_sve2_trn_q, a, 0)
2568TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2569 gen_helper_sve2_trn_q, a, 16)
74b64b25 2570
3ca879ae
RH
2571/*
2572 *** SVE Permute Vector - Predicated Group
2573 */
2574
817bd5c9
RH
2575static gen_helper_gvec_3 * const compact_fns[4] = {
2576 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2577};
ca363d23
RH
2578TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
2579 compact_fns[a->esz], a, 0)
3ca879ae 2580
ef23cb72
RH
2581/* Call the helper that computes the ARM LastActiveElement pseudocode
2582 * function, scaled by the element size. This includes the not found
2583 * indication; e.g. not found for esz=3 is -8.
2584 */
2585static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2586{
2587 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2588 * round up, as we do elsewhere, because we need the exact size.
2589 */
2590 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2591 unsigned desc = 0;
ef23cb72 2592
2acbfbe4
RH
2593 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2594 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2595
2596 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2597
c6a59b55 2598 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2599
ef23cb72
RH
2600 tcg_temp_free_ptr(t_p);
2601}
2602
2603/* Increment LAST to the offset of the next element in the vector,
2604 * wrapping around to 0.
2605 */
2606static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2607{
2608 unsigned vsz = vec_full_reg_size(s);
2609
2610 tcg_gen_addi_i32(last, last, 1 << esz);
2611 if (is_power_of_2(vsz)) {
2612 tcg_gen_andi_i32(last, last, vsz - 1);
2613 } else {
4b308bd5
RH
2614 TCGv_i32 max = tcg_constant_i32(vsz);
2615 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2616 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2617 }
2618}
2619
2620/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2621static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2622{
2623 unsigned vsz = vec_full_reg_size(s);
2624
2625 if (is_power_of_2(vsz)) {
2626 tcg_gen_andi_i32(last, last, vsz - 1);
2627 } else {
4b308bd5
RH
2628 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2629 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2630 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2631 }
2632}
2633
2634/* Load an unsigned element of ESZ from BASE+OFS. */
2635static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2636{
2637 TCGv_i64 r = tcg_temp_new_i64();
2638
2639 switch (esz) {
2640 case 0:
2641 tcg_gen_ld8u_i64(r, base, ofs);
2642 break;
2643 case 1:
2644 tcg_gen_ld16u_i64(r, base, ofs);
2645 break;
2646 case 2:
2647 tcg_gen_ld32u_i64(r, base, ofs);
2648 break;
2649 case 3:
2650 tcg_gen_ld_i64(r, base, ofs);
2651 break;
2652 default:
2653 g_assert_not_reached();
2654 }
2655 return r;
2656}
2657
2658/* Load an unsigned element of ESZ from RM[LAST]. */
2659static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2660 int rm, int esz)
2661{
2662 TCGv_ptr p = tcg_temp_new_ptr();
2663 TCGv_i64 r;
2664
2665 /* Convert offset into vector into offset into ENV.
2666 * The final adjustment for the vector register base
2667 * is added via constant offset to the load.
2668 */
e03b5686 2669#if HOST_BIG_ENDIAN
ef23cb72
RH
2670 /* Adjust for element ordering. See vec_reg_offset. */
2671 if (esz < 3) {
2672 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2673 }
2674#endif
2675 tcg_gen_ext_i32_ptr(p, last);
2676 tcg_gen_add_ptr(p, p, cpu_env);
2677
2678 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2679 tcg_temp_free_ptr(p);
2680
2681 return r;
2682}
2683
2684/* Compute CLAST for a Zreg. */
2685static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2686{
2687 TCGv_i32 last;
2688 TCGLabel *over;
2689 TCGv_i64 ele;
2690 unsigned vsz, esz = a->esz;
2691
2692 if (!sve_access_check(s)) {
2693 return true;
2694 }
2695
2696 last = tcg_temp_local_new_i32();
2697 over = gen_new_label();
2698
2699 find_last_active(s, last, esz, a->pg);
2700
2701 /* There is of course no movcond for a 2048-bit vector,
2702 * so we must branch over the actual store.
2703 */
2704 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2705
2706 if (!before) {
2707 incr_last_active(s, last, esz);
2708 }
2709
2710 ele = load_last_active(s, last, a->rm, esz);
2711 tcg_temp_free_i32(last);
2712
2713 vsz = vec_full_reg_size(s);
2714 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2715 tcg_temp_free_i64(ele);
2716
2717 /* If this insn used MOVPRFX, we may need a second move. */
2718 if (a->rd != a->rn) {
2719 TCGLabel *done = gen_new_label();
2720 tcg_gen_br(done);
2721
2722 gen_set_label(over);
2723 do_mov_z(s, a->rd, a->rn);
2724
2725 gen_set_label(done);
2726 } else {
2727 gen_set_label(over);
2728 }
2729 return true;
2730}
2731
db7fa5d8
RH
2732TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2733TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2734
2735/* Compute CLAST for a scalar. */
2736static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2737 bool before, TCGv_i64 reg_val)
2738{
2739 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2740 TCGv_i64 ele, cmp;
ef23cb72
RH
2741
2742 find_last_active(s, last, esz, pg);
2743
2744 /* Extend the original value of last prior to incrementing. */
2745 cmp = tcg_temp_new_i64();
2746 tcg_gen_ext_i32_i64(cmp, last);
2747
2748 if (!before) {
2749 incr_last_active(s, last, esz);
2750 }
2751
2752 /* The conceit here is that while last < 0 indicates not found, after
2753 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2754 * from which we can load garbage. We then discard the garbage with
2755 * a conditional move.
2756 */
2757 ele = load_last_active(s, last, rm, esz);
2758 tcg_temp_free_i32(last);
2759
053552d3
RH
2760 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2761 ele, reg_val);
ef23cb72 2762
ef23cb72
RH
2763 tcg_temp_free_i64(cmp);
2764 tcg_temp_free_i64(ele);
2765}
2766
2767/* Compute CLAST for a Vreg. */
2768static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2769{
2770 if (sve_access_check(s)) {
2771 int esz = a->esz;
2772 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2773 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2774
2775 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2776 write_fp_dreg(s, a->rd, reg);
2777 tcg_temp_free_i64(reg);
2778 }
2779 return true;
2780}
2781
ac4fb247
RH
2782TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2783TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2784
2785/* Compute CLAST for a Xreg. */
2786static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2787{
2788 TCGv_i64 reg;
2789
2790 if (!sve_access_check(s)) {
2791 return true;
2792 }
2793
2794 reg = cpu_reg(s, a->rd);
2795 switch (a->esz) {
2796 case 0:
2797 tcg_gen_ext8u_i64(reg, reg);
2798 break;
2799 case 1:
2800 tcg_gen_ext16u_i64(reg, reg);
2801 break;
2802 case 2:
2803 tcg_gen_ext32u_i64(reg, reg);
2804 break;
2805 case 3:
2806 break;
2807 default:
2808 g_assert_not_reached();
2809 }
2810
2811 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2812 return true;
2813}
2814
c673404a
RH
2815TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2816TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2817
2818/* Compute LAST for a scalar. */
2819static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2820 int pg, int rm, bool before)
2821{
2822 TCGv_i32 last = tcg_temp_new_i32();
2823 TCGv_i64 ret;
2824
2825 find_last_active(s, last, esz, pg);
2826 if (before) {
2827 wrap_last_active(s, last, esz);
2828 } else {
2829 incr_last_active(s, last, esz);
2830 }
2831
2832 ret = load_last_active(s, last, rm, esz);
2833 tcg_temp_free_i32(last);
2834 return ret;
2835}
2836
2837/* Compute LAST for a Vreg. */
2838static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2839{
2840 if (sve_access_check(s)) {
2841 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2842 write_fp_dreg(s, a->rd, val);
2843 tcg_temp_free_i64(val);
2844 }
2845 return true;
2846}
2847
75de9fd4
RH
2848TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2849TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2850
2851/* Compute LAST for a Xreg. */
2852static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2853{
2854 if (sve_access_check(s)) {
2855 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2856 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2857 tcg_temp_free_i64(val);
2858 }
2859 return true;
2860}
2861
884c5a80
RH
2862TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2863TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2864
3a7be554 2865static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578 2866{
1402a6b8
RH
2867 if (!dc_isar_feature(aa64_sve, s)) {
2868 return false;
2869 }
792a5578
RH
2870 if (sve_access_check(s)) {
2871 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2872 }
2873 return true;
2874}
2875
3a7be554 2876static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578 2877{
1402a6b8
RH
2878 if (!dc_isar_feature(aa64_sve, s)) {
2879 return false;
2880 }
792a5578
RH
2881 if (sve_access_check(s)) {
2882 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2883 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2884 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2885 tcg_temp_free_i64(t);
2886 }
2887 return true;
2888}
2889
817bd5c9
RH
2890static gen_helper_gvec_3 * const revb_fns[4] = {
2891 NULL, gen_helper_sve_revb_h,
2892 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2893};
2894TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2895
817bd5c9
RH
2896static gen_helper_gvec_3 * const revh_fns[4] = {
2897 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2898};
2899TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2900
817bd5c9
RH
2901TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2902 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2903
897ebd70
RH
2904TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2905 gen_helper_sve_splice, a, a->esz)
b48ff240 2906
897ebd70
RH
2907TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2908 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2909
757f9cff
RH
2910/*
2911 *** SVE Integer Compare - Vectors Group
2912 */
2913
2914static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2915 gen_helper_gvec_flags_4 *gen_fn)
2916{
2917 TCGv_ptr pd, zn, zm, pg;
2918 unsigned vsz;
2919 TCGv_i32 t;
2920
2921 if (gen_fn == NULL) {
2922 return false;
2923 }
2924 if (!sve_access_check(s)) {
2925 return true;
2926 }
2927
2928 vsz = vec_full_reg_size(s);
392acacc 2929 t = tcg_temp_new_i32();
757f9cff
RH
2930 pd = tcg_temp_new_ptr();
2931 zn = tcg_temp_new_ptr();
2932 zm = tcg_temp_new_ptr();
2933 pg = tcg_temp_new_ptr();
2934
2935 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2936 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2937 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2938 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2939
392acacc 2940 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2941
2942 tcg_temp_free_ptr(pd);
2943 tcg_temp_free_ptr(zn);
2944 tcg_temp_free_ptr(zm);
2945 tcg_temp_free_ptr(pg);
2946
2947 do_pred_flags(t);
2948
2949 tcg_temp_free_i32(t);
2950 return true;
2951}
2952
2953#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2954 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2955 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2956 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2957 }; \
2958 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2959 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2960
2961DO_PPZZ(CMPEQ, cmpeq)
2962DO_PPZZ(CMPNE, cmpne)
2963DO_PPZZ(CMPGT, cmpgt)
2964DO_PPZZ(CMPGE, cmpge)
2965DO_PPZZ(CMPHI, cmphi)
2966DO_PPZZ(CMPHS, cmphs)
2967
2968#undef DO_PPZZ
2969
2970#define DO_PPZW(NAME, name) \
671bdb2e
RH
2971 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2972 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2973 gen_helper_sve_##name##_ppzw_s, NULL \
2974 }; \
2975 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2976 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2977
2978DO_PPZW(CMPEQ, cmpeq)
2979DO_PPZW(CMPNE, cmpne)
2980DO_PPZW(CMPGT, cmpgt)
2981DO_PPZW(CMPGE, cmpge)
2982DO_PPZW(CMPHI, cmphi)
2983DO_PPZW(CMPHS, cmphs)
2984DO_PPZW(CMPLT, cmplt)
2985DO_PPZW(CMPLE, cmple)
2986DO_PPZW(CMPLO, cmplo)
2987DO_PPZW(CMPLS, cmpls)
2988
2989#undef DO_PPZW
2990
38cadeba
RH
2991/*
2992 *** SVE Integer Compare - Immediate Groups
2993 */
2994
2995static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2996 gen_helper_gvec_flags_3 *gen_fn)
2997{
2998 TCGv_ptr pd, zn, pg;
2999 unsigned vsz;
3000 TCGv_i32 t;
3001
3002 if (gen_fn == NULL) {
3003 return false;
3004 }
3005 if (!sve_access_check(s)) {
3006 return true;
3007 }
3008
3009 vsz = vec_full_reg_size(s);
392acacc 3010 t = tcg_temp_new_i32();
38cadeba
RH
3011 pd = tcg_temp_new_ptr();
3012 zn = tcg_temp_new_ptr();
3013 pg = tcg_temp_new_ptr();
3014
3015 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3016 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3017 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3018
392acacc 3019 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
3020
3021 tcg_temp_free_ptr(pd);
3022 tcg_temp_free_ptr(zn);
3023 tcg_temp_free_ptr(pg);
3024
3025 do_pred_flags(t);
3026
3027 tcg_temp_free_i32(t);
3028 return true;
3029}
3030
3031#define DO_PPZI(NAME, name) \
9c545be6 3032 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
3033 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3034 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3035 }; \
9c545be6
RH
3036 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
3037 name##_ppzi_fns[a->esz])
38cadeba
RH
3038
3039DO_PPZI(CMPEQ, cmpeq)
3040DO_PPZI(CMPNE, cmpne)
3041DO_PPZI(CMPGT, cmpgt)
3042DO_PPZI(CMPGE, cmpge)
3043DO_PPZI(CMPHI, cmphi)
3044DO_PPZI(CMPHS, cmphs)
3045DO_PPZI(CMPLT, cmplt)
3046DO_PPZI(CMPLE, cmple)
3047DO_PPZI(CMPLO, cmplo)
3048DO_PPZI(CMPLS, cmpls)
3049
3050#undef DO_PPZI
3051
35da316f
RH
3052/*
3053 *** SVE Partition Break Group
3054 */
3055
3056static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3057 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3058{
3059 if (!sve_access_check(s)) {
3060 return true;
3061 }
3062
3063 unsigned vsz = pred_full_reg_size(s);
3064
3065 /* Predicate sizes may be smaller and cannot use simd_desc. */
3066 TCGv_ptr d = tcg_temp_new_ptr();
3067 TCGv_ptr n = tcg_temp_new_ptr();
3068 TCGv_ptr m = tcg_temp_new_ptr();
3069 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3070 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3071
3072 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3073 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3074 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3075 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3076
3077 if (a->s) {
93418f1c
RH
3078 TCGv_i32 t = tcg_temp_new_i32();
3079 fn_s(t, d, n, m, g, desc);
35da316f 3080 do_pred_flags(t);
93418f1c 3081 tcg_temp_free_i32(t);
35da316f 3082 } else {
93418f1c 3083 fn(d, n, m, g, desc);
35da316f
RH
3084 }
3085 tcg_temp_free_ptr(d);
3086 tcg_temp_free_ptr(n);
3087 tcg_temp_free_ptr(m);
3088 tcg_temp_free_ptr(g);
35da316f
RH
3089 return true;
3090}
3091
3092static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3093 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3094{
3095 if (!sve_access_check(s)) {
3096 return true;
3097 }
3098
3099 unsigned vsz = pred_full_reg_size(s);
3100
3101 /* Predicate sizes may be smaller and cannot use simd_desc. */
3102 TCGv_ptr d = tcg_temp_new_ptr();
3103 TCGv_ptr n = tcg_temp_new_ptr();
3104 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 3105 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
3106
3107 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3108 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3109 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3110
3111 if (a->s) {
93418f1c
RH
3112 TCGv_i32 t = tcg_temp_new_i32();
3113 fn_s(t, d, n, g, desc);
35da316f 3114 do_pred_flags(t);
93418f1c 3115 tcg_temp_free_i32(t);
35da316f 3116 } else {
93418f1c 3117 fn(d, n, g, desc);
35da316f
RH
3118 }
3119 tcg_temp_free_ptr(d);
3120 tcg_temp_free_ptr(n);
3121 tcg_temp_free_ptr(g);
35da316f
RH
3122 return true;
3123}
3124
2224d24d
RH
3125TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3126 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3127TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3128 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3129
3130TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3131 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3132TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3133 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3134
3135TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3136 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3137TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3138 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3139
3140TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3141 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 3142
9ee3a611
RH
3143/*
3144 *** SVE Predicate Count Group
3145 */
3146
3147static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3148{
3149 unsigned psz = pred_full_reg_size(s);
3150
3151 if (psz <= 8) {
3152 uint64_t psz_mask;
3153
3154 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3155 if (pn != pg) {
3156 TCGv_i64 g = tcg_temp_new_i64();
3157 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3158 tcg_gen_and_i64(val, val, g);
3159 tcg_temp_free_i64(g);
3160 }
3161
3162 /* Reduce the pred_esz_masks value simply to reduce the
3163 * size of the code generated here.
3164 */
3165 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3166 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3167
3168 tcg_gen_ctpop_i64(val, val);
3169 } else {
3170 TCGv_ptr t_pn = tcg_temp_new_ptr();
3171 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 3172 unsigned desc = 0;
9ee3a611 3173
f556a201
RH
3174 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3175 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
3176
3177 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3178 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 3179
c6a59b55 3180 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
3181 tcg_temp_free_ptr(t_pn);
3182 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
3183 }
3184}
3185
3a7be554 3186static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611 3187{
1402a6b8
RH
3188 if (!dc_isar_feature(aa64_sve, s)) {
3189 return false;
3190 }
9ee3a611
RH
3191 if (sve_access_check(s)) {
3192 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3193 }
3194 return true;
3195}
3196
3a7be554 3197static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611 3198{
1402a6b8
RH
3199 if (!dc_isar_feature(aa64_sve, s)) {
3200 return false;
3201 }
9ee3a611
RH
3202 if (sve_access_check(s)) {
3203 TCGv_i64 reg = cpu_reg(s, a->rd);
3204 TCGv_i64 val = tcg_temp_new_i64();
3205
3206 do_cntp(s, val, a->esz, a->pg, a->pg);
3207 if (a->d) {
3208 tcg_gen_sub_i64(reg, reg, val);
3209 } else {
3210 tcg_gen_add_i64(reg, reg, val);
3211 }
3212 tcg_temp_free_i64(val);
3213 }
3214 return true;
3215}
3216
3a7be554 3217static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611 3218{
1402a6b8 3219 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
9ee3a611
RH
3220 return false;
3221 }
3222 if (sve_access_check(s)) {
3223 unsigned vsz = vec_full_reg_size(s);
3224 TCGv_i64 val = tcg_temp_new_i64();
3225 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3226
3227 do_cntp(s, val, a->esz, a->pg, a->pg);
3228 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3229 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3230 }
3231 return true;
3232}
3233
3a7be554 3234static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611 3235{
1402a6b8
RH
3236 if (!dc_isar_feature(aa64_sve, s)) {
3237 return false;
3238 }
9ee3a611
RH
3239 if (sve_access_check(s)) {
3240 TCGv_i64 reg = cpu_reg(s, a->rd);
3241 TCGv_i64 val = tcg_temp_new_i64();
3242
3243 do_cntp(s, val, a->esz, a->pg, a->pg);
3244 do_sat_addsub_32(reg, val, a->u, a->d);
3245 }
3246 return true;
3247}
3248
3a7be554 3249static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611 3250{
1402a6b8
RH
3251 if (!dc_isar_feature(aa64_sve, s)) {
3252 return false;
3253 }
9ee3a611
RH
3254 if (sve_access_check(s)) {
3255 TCGv_i64 reg = cpu_reg(s, a->rd);
3256 TCGv_i64 val = tcg_temp_new_i64();
3257
3258 do_cntp(s, val, a->esz, a->pg, a->pg);
3259 do_sat_addsub_64(reg, val, a->u, a->d);
3260 }
3261 return true;
3262}
3263
3a7be554 3264static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611 3265{
1402a6b8 3266 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
9ee3a611
RH
3267 return false;
3268 }
3269 if (sve_access_check(s)) {
3270 TCGv_i64 val = tcg_temp_new_i64();
3271 do_cntp(s, val, a->esz, a->pg, a->pg);
3272 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3273 }
3274 return true;
3275}
3276
caf1cefc
RH
3277/*
3278 *** SVE Integer Compare Scalars Group
3279 */
3280
3a7be554 3281static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc 3282{
1402a6b8
RH
3283 if (!dc_isar_feature(aa64_sve, s)) {
3284 return false;
3285 }
caf1cefc
RH
3286 if (!sve_access_check(s)) {
3287 return true;
3288 }
3289
3290 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3291 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3292 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3293 TCGv_i64 cmp = tcg_temp_new_i64();
3294
3295 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3296 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3297 tcg_temp_free_i64(cmp);
3298
3299 /* VF = !NF & !CF. */
3300 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3301 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3302
3303 /* Both NF and VF actually look at bit 31. */
3304 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3305 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3306 return true;
3307}
3308
3a7be554 3309static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3310{
bbd0968c 3311 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3312 TCGv_i32 t2;
caf1cefc 3313 TCGv_ptr ptr;
e610906c
RH
3314 unsigned vsz = vec_full_reg_size(s);
3315 unsigned desc = 0;
caf1cefc 3316 TCGCond cond;
34688dbc
RH
3317 uint64_t maxval;
3318 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3319 bool eq = a->eq == a->lt;
caf1cefc 3320
34688dbc 3321 /* The greater-than conditions are all SVE2. */
1402a6b8
RH
3322 if (a->lt
3323 ? !dc_isar_feature(aa64_sve, s)
3324 : !dc_isar_feature(aa64_sve2, s)) {
34688dbc
RH
3325 return false;
3326 }
bbd0968c
RH
3327 if (!sve_access_check(s)) {
3328 return true;
3329 }
3330
3331 op0 = read_cpu_reg(s, a->rn, 1);
3332 op1 = read_cpu_reg(s, a->rm, 1);
3333
caf1cefc
RH
3334 if (!a->sf) {
3335 if (a->u) {
3336 tcg_gen_ext32u_i64(op0, op0);
3337 tcg_gen_ext32u_i64(op1, op1);
3338 } else {
3339 tcg_gen_ext32s_i64(op0, op0);
3340 tcg_gen_ext32s_i64(op1, op1);
3341 }
3342 }
3343
3344 /* For the helper, compress the different conditions into a computation
3345 * of how many iterations for which the condition is true.
caf1cefc 3346 */
bbd0968c
RH
3347 t0 = tcg_temp_new_i64();
3348 t1 = tcg_temp_new_i64();
34688dbc
RH
3349
3350 if (a->lt) {
3351 tcg_gen_sub_i64(t0, op1, op0);
3352 if (a->u) {
3353 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3354 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3355 } else {
3356 maxval = a->sf ? INT64_MAX : INT32_MAX;
3357 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3358 }
3359 } else {
3360 tcg_gen_sub_i64(t0, op0, op1);
3361 if (a->u) {
3362 maxval = 0;
3363 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3364 } else {
3365 maxval = a->sf ? INT64_MIN : INT32_MIN;
3366 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3367 }
3368 }
caf1cefc 3369
4481bbf2 3370 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3371 if (eq) {
caf1cefc
RH
3372 /* Equality means one more iteration. */
3373 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3374
34688dbc
RH
3375 /*
3376 * For the less-than while, if op1 is maxval (and the only time
3377 * the addition above could overflow), then we produce an all-true
3378 * predicate by setting the count to the vector length. This is
3379 * because the pseudocode is described as an increment + compare
3380 * loop, and the maximum integer would always compare true.
3381 * Similarly, the greater-than while has the same issue with the
3382 * minimum integer due to the decrement + compare loop.
bbd0968c 3383 */
34688dbc 3384 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3385 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3386 }
3387
bbd0968c
RH
3388 /* Bound to the maximum. */
3389 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3390
3391 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3392 tcg_gen_movi_i64(t1, 0);
3393 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3394 tcg_temp_free_i64(t1);
caf1cefc 3395
bbd0968c 3396 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3397 t2 = tcg_temp_new_i32();
3398 tcg_gen_extrl_i64_i32(t2, t0);
3399 tcg_temp_free_i64(t0);
bbd0968c
RH
3400
3401 /* Scale elements to bits. */
3402 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3403
e610906c
RH
3404 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3405 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3406
3407 ptr = tcg_temp_new_ptr();
3408 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3409
34688dbc 3410 if (a->lt) {
4481bbf2 3411 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3412 } else {
4481bbf2 3413 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3414 }
caf1cefc
RH
3415 do_pred_flags(t2);
3416
3417 tcg_temp_free_ptr(ptr);
3418 tcg_temp_free_i32(t2);
caf1cefc
RH
3419 return true;
3420}
3421
14f6dad1
RH
3422static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3423{
3424 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3425 TCGv_i32 t2;
14f6dad1
RH
3426 TCGv_ptr ptr;
3427 unsigned vsz = vec_full_reg_size(s);
3428 unsigned desc = 0;
3429
3430 if (!dc_isar_feature(aa64_sve2, s)) {
3431 return false;
3432 }
3433 if (!sve_access_check(s)) {
3434 return true;
3435 }
3436
3437 op0 = read_cpu_reg(s, a->rn, 1);
3438 op1 = read_cpu_reg(s, a->rm, 1);
3439
4481bbf2 3440 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3441 diff = tcg_temp_new_i64();
3442
3443 if (a->rw) {
3444 /* WHILERW */
3445 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3446 t1 = tcg_temp_new_i64();
3447 tcg_gen_sub_i64(diff, op0, op1);
3448 tcg_gen_sub_i64(t1, op1, op0);
3449 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3450 tcg_temp_free_i64(t1);
3451 /* Round down to a multiple of ESIZE. */
3452 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3453 /* If op1 == op0, diff == 0, and the condition is always true. */
3454 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3455 } else {
3456 /* WHILEWR */
3457 tcg_gen_sub_i64(diff, op1, op0);
3458 /* Round down to a multiple of ESIZE. */
3459 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3460 /* If op0 >= op1, diff <= 0, the condition is always true. */
3461 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3462 }
3463
3464 /* Bound to the maximum. */
3465 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3466
3467 /* Since we're bounded, pass as a 32-bit type. */
3468 t2 = tcg_temp_new_i32();
3469 tcg_gen_extrl_i64_i32(t2, diff);
3470 tcg_temp_free_i64(diff);
3471
3472 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3473 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3474
3475 ptr = tcg_temp_new_ptr();
3476 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3477
4481bbf2 3478 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3479 do_pred_flags(t2);
3480
3481 tcg_temp_free_ptr(ptr);
3482 tcg_temp_free_i32(t2);
14f6dad1
RH
3483 return true;
3484}
3485
ed491961
RH
3486/*
3487 *** SVE Integer Wide Immediate - Unpredicated Group
3488 */
3489
3a7be554 3490static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961 3491{
1402a6b8 3492 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
ed491961
RH
3493 return false;
3494 }
3495 if (sve_access_check(s)) {
3496 unsigned vsz = vec_full_reg_size(s);
3497 int dofs = vec_full_reg_offset(s, a->rd);
3498 uint64_t imm;
3499
3500 /* Decode the VFP immediate. */
3501 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3502 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3503 }
3504 return true;
3505}
3506
3a7be554 3507static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3508{
1402a6b8
RH
3509 if (!dc_isar_feature(aa64_sve, s)) {
3510 return false;
3511 }
ed491961
RH
3512 if (sve_access_check(s)) {
3513 unsigned vsz = vec_full_reg_size(s);
3514 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3515 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3516 }
3517 return true;
3518}
3519
48ca613d 3520TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3521
3a7be554 3522static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3523{
3524 a->imm = -a->imm;
3a7be554 3525 return trans_ADD_zzi(s, a);
6e6a157d
RH
3526}
3527
3a7be554 3528static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3529{
53229a77 3530 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3531 static const GVecGen2s op[4] = {
3532 { .fni8 = tcg_gen_vec_sub8_i64,
3533 .fniv = tcg_gen_sub_vec,
3534 .fno = gen_helper_sve_subri_b,
53229a77 3535 .opt_opc = vecop_list,
6e6a157d
RH
3536 .vece = MO_8,
3537 .scalar_first = true },
3538 { .fni8 = tcg_gen_vec_sub16_i64,
3539 .fniv = tcg_gen_sub_vec,
3540 .fno = gen_helper_sve_subri_h,
53229a77 3541 .opt_opc = vecop_list,
6e6a157d
RH
3542 .vece = MO_16,
3543 .scalar_first = true },
3544 { .fni4 = tcg_gen_sub_i32,
3545 .fniv = tcg_gen_sub_vec,
3546 .fno = gen_helper_sve_subri_s,
53229a77 3547 .opt_opc = vecop_list,
6e6a157d
RH
3548 .vece = MO_32,
3549 .scalar_first = true },
3550 { .fni8 = tcg_gen_sub_i64,
3551 .fniv = tcg_gen_sub_vec,
3552 .fno = gen_helper_sve_subri_d,
53229a77 3553 .opt_opc = vecop_list,
6e6a157d
RH
3554 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3555 .vece = MO_64,
3556 .scalar_first = true }
3557 };
3558
1402a6b8
RH
3559 if (!dc_isar_feature(aa64_sve, s)) {
3560 return false;
3561 }
6e6a157d
RH
3562 if (sve_access_check(s)) {
3563 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3564 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3565 vec_full_reg_offset(s, a->rn),
9fff3fcc 3566 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3567 }
3568 return true;
3569}
3570
fa4bd72c 3571TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3572
3a7be554 3573static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3574{
6e6a157d 3575 if (sve_access_check(s)) {
138a1f7b
RH
3576 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3577 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3578 }
3579 return true;
3580}
3581
17b54d1c
RH
3582TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3583TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3584TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3585TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3586
3587static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3588{
3589 if (sve_access_check(s)) {
3590 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3591 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3592 vec_full_reg_offset(s, a->rn),
138a1f7b 3593 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3594 }
3595 return true;
3596}
3597
3598#define DO_ZZI(NAME, name) \
ef4a3958 3599 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3600 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3601 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3602 }; \
ef4a3958 3603 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3604
3605DO_ZZI(SMAX, smax)
3606DO_ZZI(UMAX, umax)
3607DO_ZZI(SMIN, smin)
3608DO_ZZI(UMIN, umin)
3609
3610#undef DO_ZZI
3611
5f425b92
RH
3612static gen_helper_gvec_4 * const dot_fns[2][2] = {
3613 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3614 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3615};
3616TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3617 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3618
814d4c52
RH
3619/*
3620 * SVE Multiply - Indexed
3621 */
3622
f3500a25
RH
3623TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3624 gen_helper_gvec_sdot_idx_b, a)
3625TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3626 gen_helper_gvec_sdot_idx_h, a)
3627TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3628 gen_helper_gvec_udot_idx_b, a)
3629TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3630 gen_helper_gvec_udot_idx_h, a)
3631
3632TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3633 gen_helper_gvec_sudot_idx_b, a)
3634TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3635 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3636
814d4c52 3637#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3638 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3639 a->rd, a->rn, a->rm, a->index)
814d4c52 3640
af031f64
RH
3641DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3642DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3643DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3644
af031f64
RH
3645DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3646DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3647DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3648
af031f64
RH
3649DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3650DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3651DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3652
814d4c52
RH
3653#undef DO_SVE2_RRX
3654
b95f5eeb 3655#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3656 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3657 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3658
3659DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3660DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3661DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3662DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3663
3664DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3665DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3666DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3667DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3668
3669DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3670DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3671DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3672DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3673
b95f5eeb
RH
3674#undef DO_SVE2_RRX_TB
3675
8a02aac7 3676#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3677 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3678
8681eb76
RH
3679DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3680DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3681DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3682
8681eb76
RH
3683DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3684DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3685DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3686
8681eb76
RH
3687DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3688DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3689DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3690
8681eb76
RH
3691DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3692DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3693DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3694
8a02aac7
RH
3695#undef DO_SVE2_RRXR
3696
c5c455d7 3697#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3698 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3699 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3700
3701DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3702DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3703DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3704DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3705
3706DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3707DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3708DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3709DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3710
3711DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3712DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3713DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3714DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3715
3716DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3717DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3718DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3719DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3720
3721DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3722DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3723DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3724DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3725
3726DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3727DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3728DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3729DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3730
3731#undef DO_SVE2_RRXR_TB
3732
3b787ed8 3733#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3734 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3735 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3736
3737DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3738DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3739
3740DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3741DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3742
21068f39
RH
3743DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3744DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3745
3b787ed8
RH
3746#undef DO_SVE2_RRXR_ROT
3747
ca40a6e6
RH
3748/*
3749 *** SVE Floating Point Multiply-Add Indexed Group
3750 */
3751
0a82d963 3752static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3753{
41bf9b67
RH
3754 static gen_helper_gvec_4_ptr * const fns[4] = {
3755 NULL,
ca40a6e6
RH
3756 gen_helper_gvec_fmla_idx_h,
3757 gen_helper_gvec_fmla_idx_s,
3758 gen_helper_gvec_fmla_idx_d,
3759 };
41bf9b67
RH
3760 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3761 (a->index << 1) | sub,
3762 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3763}
3764
3b879c28
RH
3765TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3766TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
0a82d963 3767
ca40a6e6
RH
3768/*
3769 *** SVE Floating Point Multiply Indexed Group
3770 */
3771
9c99ef66
RH
3772static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3773 NULL, gen_helper_gvec_fmul_idx_h,
3774 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3775};
3776TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3777 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3778 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
ca40a6e6 3779
23fbe79f
RH
3780/*
3781 *** SVE Floating Point Fast Reduction Group
3782 */
3783
3784typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3785 TCGv_ptr, TCGv_i32);
3786
5ce18efe 3787static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
23fbe79f
RH
3788 gen_helper_fp_reduce *fn)
3789{
5ce18efe
RH
3790 unsigned vsz, p2vsz;
3791 TCGv_i32 t_desc;
23fbe79f
RH
3792 TCGv_ptr t_zn, t_pg, status;
3793 TCGv_i64 temp;
3794
5ce18efe
RH
3795 if (fn == NULL) {
3796 return false;
3797 }
3798 if (!sve_access_check(s)) {
3799 return true;
3800 }
3801
3802 vsz = vec_full_reg_size(s);
3803 p2vsz = pow2ceil(vsz);
3804 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3805 temp = tcg_temp_new_i64();
3806 t_zn = tcg_temp_new_ptr();
3807 t_pg = tcg_temp_new_ptr();
3808
3809 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3810 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3811 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3812
3813 fn(temp, t_zn, t_pg, status, t_desc);
3814 tcg_temp_free_ptr(t_zn);
3815 tcg_temp_free_ptr(t_pg);
3816 tcg_temp_free_ptr(status);
23fbe79f
RH
3817
3818 write_fp_dreg(s, a->rd, temp);
3819 tcg_temp_free_i64(temp);
5ce18efe 3820 return true;
23fbe79f
RH
3821}
3822
3823#define DO_VPZ(NAME, name) \
8003e7cf
RH
3824 static gen_helper_fp_reduce * const name##_fns[4] = { \
3825 NULL, gen_helper_sve_##name##_h, \
3826 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
23fbe79f 3827 }; \
8003e7cf 3828 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
23fbe79f
RH
3829
3830DO_VPZ(FADDV, faddv)
3831DO_VPZ(FMINNMV, fminnmv)
3832DO_VPZ(FMAXNMV, fmaxnmv)
3833DO_VPZ(FMINV, fminv)
3834DO_VPZ(FMAXV, fmaxv)
3835
8003e7cf
RH
3836#undef DO_VPZ
3837
3887c038
RH
3838/*
3839 *** SVE Floating Point Unary Operations - Unpredicated Group
3840 */
3841
de58c6b0
RH
3842static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3843 NULL, gen_helper_gvec_frecpe_h,
3844 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3845};
3846TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3887c038 3847
de58c6b0
RH
3848static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3849 NULL, gen_helper_gvec_frsqrte_h,
3850 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3851};
3852TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3887c038 3853
4d2e2a03
RH
3854/*
3855 *** SVE Floating Point Compare with Zero Group
3856 */
3857
63d6aef8 3858static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4d2e2a03
RH
3859 gen_helper_gvec_3_ptr *fn)
3860{
63d6aef8
RH
3861 if (fn == NULL) {
3862 return false;
3863 }
3864 if (sve_access_check(s)) {
3865 unsigned vsz = vec_full_reg_size(s);
3866 TCGv_ptr status =
3867 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03 3868
63d6aef8
RH
3869 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3870 vec_full_reg_offset(s, a->rn),
3871 pred_full_reg_offset(s, a->pg),
3872 status, vsz, vsz, 0, fn);
3873 tcg_temp_free_ptr(status);
3874 }
3875 return true;
4d2e2a03
RH
3876}
3877
3878#define DO_PPZ(NAME, name) \
63d6aef8
RH
3879 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3880 NULL, gen_helper_sve_##name##_h, \
3881 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
4d2e2a03 3882 }; \
63d6aef8 3883 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
4d2e2a03
RH
3884
3885DO_PPZ(FCMGE_ppz0, fcmge0)
3886DO_PPZ(FCMGT_ppz0, fcmgt0)
3887DO_PPZ(FCMLE_ppz0, fcmle0)
3888DO_PPZ(FCMLT_ppz0, fcmlt0)
3889DO_PPZ(FCMEQ_ppz0, fcmeq0)
3890DO_PPZ(FCMNE_ppz0, fcmne0)
3891
3892#undef DO_PPZ
3893
67fcd9ad
RH
3894/*
3895 *** SVE floating-point trig multiply-add coefficient
3896 */
3897
cdd85923
RH
3898static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3899 NULL, gen_helper_sve_ftmad_h,
3900 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3901};
7272e98a
RH
3902TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3903 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3904 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
67fcd9ad 3905
7f9ddf64
RH
3906/*
3907 *** SVE Floating Point Accumulating Reduction Group
3908 */
3909
3a7be554 3910static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3911{
3912 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3913 TCGv_ptr, TCGv_ptr, TCGv_i32);
3914 static fadda_fn * const fns[3] = {
3915 gen_helper_sve_fadda_h,
3916 gen_helper_sve_fadda_s,
3917 gen_helper_sve_fadda_d,
3918 };
3919 unsigned vsz = vec_full_reg_size(s);
3920 TCGv_ptr t_rm, t_pg, t_fpst;
3921 TCGv_i64 t_val;
3922 TCGv_i32 t_desc;
3923
1402a6b8 3924 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
7f9ddf64
RH
3925 return false;
3926 }
7272e98a 3927 s->is_nonstreaming = true;
7f9ddf64
RH
3928 if (!sve_access_check(s)) {
3929 return true;
3930 }
3931
3932 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3933 t_rm = tcg_temp_new_ptr();
3934 t_pg = tcg_temp_new_ptr();
3935 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3936 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3937 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3938 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3939
3940 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3941
7f9ddf64
RH
3942 tcg_temp_free_ptr(t_fpst);
3943 tcg_temp_free_ptr(t_pg);
3944 tcg_temp_free_ptr(t_rm);
3945
3946 write_fp_dreg(s, a->rd, t_val);
3947 tcg_temp_free_i64(t_val);
3948 return true;
3949}
3950
29b80469
RH
3951/*
3952 *** SVE Floating Point Arithmetic - Unpredicated Group
3953 */
3954
29b80469 3955#define DO_FP3(NAME, name) \
bdd4ce0d 3956 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
29b80469
RH
3957 NULL, gen_helper_gvec_##name##_h, \
3958 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3959 }; \
bdd4ce0d 3960 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
29b80469
RH
3961
3962DO_FP3(FADD_zzz, fadd)
3963DO_FP3(FSUB_zzz, fsub)
3964DO_FP3(FMUL_zzz, fmul)
29b80469
RH
3965DO_FP3(FRECPS, recps)
3966DO_FP3(FRSQRTS, rsqrts)
3967
3968#undef DO_FP3
3969
7272e98a
RH
3970static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
3971 NULL, gen_helper_gvec_ftsmul_h,
3972 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
3973};
3974TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
3975 ftsmul_fns[a->esz], a, 0)
3976
ec3b87c2
RH
3977/*
3978 *** SVE Floating Point Arithmetic - Predicated Group
3979 */
3980
7de2617b
RH
3981#define DO_ZPZZ_FP(NAME, FEAT, name) \
3982 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3983 NULL, gen_helper_##name##_h, \
3984 gen_helper_##name##_s, gen_helper_##name##_d \
3985 }; \
3986 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3987
3988DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3989DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3990DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3991DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3992DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3993DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3994DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3995DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3996DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3997DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3998DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
8092c6a3 3999
cc48affe
RH
4000typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4001 TCGv_i64, TCGv_ptr, TCGv_i32);
4002
4003static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4004 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4005{
4006 unsigned vsz = vec_full_reg_size(s);
4007 TCGv_ptr t_zd, t_zn, t_pg, status;
4008 TCGv_i32 desc;
4009
4010 t_zd = tcg_temp_new_ptr();
4011 t_zn = tcg_temp_new_ptr();
4012 t_pg = tcg_temp_new_ptr();
4013 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4014 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4015 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4016
cdfb22bb 4017 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 4018 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
4019 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4020
cc48affe
RH
4021 tcg_temp_free_ptr(status);
4022 tcg_temp_free_ptr(t_pg);
4023 tcg_temp_free_ptr(t_zn);
4024 tcg_temp_free_ptr(t_zd);
4025}
4026
413ee8e4 4027static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
cc48affe
RH
4028 gen_helper_sve_fp2scalar *fn)
4029{
413ee8e4
RH
4030 if (fn == NULL) {
4031 return false;
4032 }
4033 if (sve_access_check(s)) {
4034 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4035 tcg_constant_i64(imm), fn);
4036 }
4037 return true;
cc48affe
RH
4038}
4039
98c37459
RH
4040#define DO_FP_IMM(NAME, name, const0, const1) \
4041 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
4042 NULL, gen_helper_sve_##name##_h, \
4043 gen_helper_sve_##name##_s, \
4044 gen_helper_sve_##name##_d \
4045 }; \
4046 static uint64_t const name##_const[4][2] = { \
4047 { -1, -1 }, \
4048 { float16_##const0, float16_##const1 }, \
4049 { float32_##const0, float32_##const1 }, \
4050 { float64_##const0, float64_##const1 }, \
4051 }; \
4052 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
4053 name##_const[a->esz][a->imm], name##_fns[a->esz])
cc48affe 4054
cc48affe
RH
4055DO_FP_IMM(FADD, fadds, half, one)
4056DO_FP_IMM(FSUB, fsubs, half, one)
4057DO_FP_IMM(FMUL, fmuls, half, two)
4058DO_FP_IMM(FSUBR, fsubrs, half, one)
4059DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4060DO_FP_IMM(FMINNM, fminnms, zero, one)
4061DO_FP_IMM(FMAX, fmaxs, zero, one)
4062DO_FP_IMM(FMIN, fmins, zero, one)
4063
4064#undef DO_FP_IMM
4065
abfdefd5
RH
4066static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4067 gen_helper_gvec_4_ptr *fn)
4068{
4069 if (fn == NULL) {
4070 return false;
4071 }
4072 if (sve_access_check(s)) {
4073 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4074 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
4075 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4076 vec_full_reg_offset(s, a->rn),
4077 vec_full_reg_offset(s, a->rm),
4078 pred_full_reg_offset(s, a->pg),
4079 status, vsz, vsz, 0, fn);
4080 tcg_temp_free_ptr(status);
4081 }
4082 return true;
4083}
4084
4085#define DO_FPCMP(NAME, name) \
d961b3e4 4086 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
abfdefd5
RH
4087 NULL, gen_helper_sve_##name##_h, \
4088 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4089 }; \
d961b3e4 4090 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
abfdefd5
RH
4091
4092DO_FPCMP(FCMGE, fcmge)
4093DO_FPCMP(FCMGT, fcmgt)
4094DO_FPCMP(FCMEQ, fcmeq)
4095DO_FPCMP(FCMNE, fcmne)
4096DO_FPCMP(FCMUO, fcmuo)
4097DO_FPCMP(FACGE, facge)
4098DO_FPCMP(FACGT, facgt)
4099
4100#undef DO_FPCMP
4101
6f5cd670
RH
4102static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
4103 NULL, gen_helper_sve_fcadd_h,
4104 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
4105};
4106TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
4107 a->rd, a->rn, a->rm, a->pg, a->rot,
4108 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
76a9d9cd 4109
6ceabaad 4110#define DO_FMLA(NAME, name) \
498be5b8
RH
4111 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
4112 NULL, gen_helper_sve_##name##_h, \
4113 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4114 }; \
4115 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4116 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4117 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
6ceabaad
RH
4118
4119DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4120DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4121DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4122DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4123
4124#undef DO_FMLA
4125
498be5b8
RH
4126static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4127 NULL, gen_helper_sve_fcmla_zpzzz_h,
4128 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4129};
4130TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4131 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
4132 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
05f48bab 4133
e600d649
RH
4134static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
4135 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
4136};
4137TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
4138 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
4139 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
18fc2405 4140
8092c6a3
RH
4141/*
4142 *** SVE Floating Point Unary Operations Predicated Group
4143 */
4144
0360730c
RH
4145TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4146 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4147TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4148 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4149
4150TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4151 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4152
4153TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4154 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4155TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4156 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4157TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4158 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4159TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4160 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4161
4162TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4163 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4164TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4165 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4166TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4167 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4168TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4169 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4170TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4171 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4172TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4173 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4174
4175TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4176 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4177TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4178 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4179TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4180 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4181TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4182 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4183TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4184 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4185TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4186 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4187
4188TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4189 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4190TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4191 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
df4de1af 4192
ed6bb6b4
RH
4193static gen_helper_gvec_3_ptr * const frint_fns[] = {
4194 NULL,
cda3c753
RH
4195 gen_helper_sve_frint_h,
4196 gen_helper_sve_frint_s,
4197 gen_helper_sve_frint_d
4198};
0360730c
RH
4199TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4200 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
cda3c753 4201
0360730c
RH
4202static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4203 NULL,
4204 gen_helper_sve_frintx_h,
4205 gen_helper_sve_frintx_s,
4206 gen_helper_sve_frintx_d
4207};
4208TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4209 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4210
95365277
SL
4211static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4212 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4213{
13c0dd17
RH
4214 unsigned vsz;
4215 TCGv_i32 tmode;
4216 TCGv_ptr status;
cda3c753 4217
13c0dd17
RH
4218 if (fn == NULL) {
4219 return false;
4220 }
4221 if (!sve_access_check(s)) {
4222 return true;
4223 }
cda3c753 4224
13c0dd17
RH
4225 vsz = vec_full_reg_size(s);
4226 tmode = tcg_const_i32(mode);
4227 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753 4228
13c0dd17
RH
4229 gen_helper_set_rmode(tmode, tmode, status);
4230
4231 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4232 vec_full_reg_offset(s, a->rn),
4233 pred_full_reg_offset(s, a->pg),
4234 status, vsz, vsz, 0, fn);
4235
4236 gen_helper_set_rmode(tmode, tmode, status);
4237 tcg_temp_free_i32(tmode);
4238 tcg_temp_free_ptr(status);
cda3c753
RH
4239 return true;
4240}
4241
27645836
RH
4242TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4243 float_round_nearest_even, frint_fns[a->esz])
4244TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4245 float_round_up, frint_fns[a->esz])
4246TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4247 float_round_down, frint_fns[a->esz])
4248TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4249 float_round_to_zero, frint_fns[a->esz])
4250TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4251 float_round_ties_away, frint_fns[a->esz])
cda3c753 4252
0360730c
RH
4253static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4254 NULL, gen_helper_sve_frecpx_h,
4255 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4256};
4257TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4258 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
8092c6a3 4259
0360730c
RH
4260static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4261 NULL, gen_helper_sve_fsqrt_h,
4262 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4263};
4264TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4265 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4266
4267TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4268 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4269TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4270 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4271TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4272 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4273
4274TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4275 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4276TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4277 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4278
4279TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4280 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4281TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4282 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4283
4284TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4285 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4286TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4287 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4288TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4289 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4290
4291TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4292 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4293TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4294 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4295TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4296 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4297
4298TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4299 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
8092c6a3 4300
d1822297
RH
4301/*
4302 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4303 */
4304
4305/* Subroutine loading a vector register at VOFS of LEN bytes.
4306 * The load should begin at the address Rn + IMM.
4307 */
4308
8713f73e
RH
4309void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
4310 int len, int rn, int imm)
d1822297 4311{
19f2acc9
RH
4312 int len_align = QEMU_ALIGN_DOWN(len, 8);
4313 int len_remain = len % 8;
4314 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4315 int midx = get_mem_index(s);
b2aa8879 4316 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4317
b2aa8879
RH
4318 dirty_addr = tcg_temp_new_i64();
4319 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4320 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4321 tcg_temp_free_i64(dirty_addr);
d1822297 4322
b2aa8879
RH
4323 /*
4324 * Note that unpredicated load/store of vector/predicate registers
d1822297 4325 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4326 * operations on larger quantities.
d1822297
RH
4327 * Attempt to keep code expansion to a minimum by limiting the
4328 * amount of unrolling done.
4329 */
4330 if (nparts <= 4) {
4331 int i;
4332
b2aa8879 4333 t0 = tcg_temp_new_i64();
d1822297 4334 for (i = 0; i < len_align; i += 8) {
fc313c64 4335 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
8713f73e 4336 tcg_gen_st_i64(t0, base, vofs + i);
d8227b09 4337 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4338 }
b2aa8879 4339 tcg_temp_free_i64(t0);
d1822297
RH
4340 } else {
4341 TCGLabel *loop = gen_new_label();
4342 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4343
b2aa8879
RH
4344 /* Copy the clean address into a local temp, live across the loop. */
4345 t0 = clean_addr;
4b4dc975 4346 clean_addr = new_tmp_a64_local(s);
b2aa8879 4347 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4348
8713f73e
RH
4349 if (base != cpu_env) {
4350 TCGv_ptr b = tcg_temp_local_new_ptr();
4351 tcg_gen_mov_ptr(b, base);
4352 base = b;
4353 }
4354
b2aa8879 4355 gen_set_label(loop);
d1822297 4356
b2aa8879 4357 t0 = tcg_temp_new_i64();
fc313c64 4358 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4359 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4360
b2aa8879 4361 tp = tcg_temp_new_ptr();
8713f73e 4362 tcg_gen_add_ptr(tp, base, i);
d1822297
RH
4363 tcg_gen_addi_ptr(i, i, 8);
4364 tcg_gen_st_i64(t0, tp, vofs);
4365 tcg_temp_free_ptr(tp);
b2aa8879 4366 tcg_temp_free_i64(t0);
d1822297
RH
4367
4368 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4369 tcg_temp_free_ptr(i);
8713f73e
RH
4370
4371 if (base != cpu_env) {
4372 tcg_temp_free_ptr(base);
4373 assert(len_remain == 0);
4374 }
d1822297
RH
4375 }
4376
b2aa8879
RH
4377 /*
4378 * Predicate register loads can be any multiple of 2.
d1822297
RH
4379 * Note that we still store the entire 64-bit unit into cpu_env.
4380 */
4381 if (len_remain) {
b2aa8879 4382 t0 = tcg_temp_new_i64();
d1822297
RH
4383 switch (len_remain) {
4384 case 2:
4385 case 4:
4386 case 8:
b2aa8879
RH
4387 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4388 MO_LE | ctz32(len_remain));
d1822297
RH
4389 break;
4390
4391 case 6:
4392 t1 = tcg_temp_new_i64();
b2aa8879
RH
4393 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4394 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4395 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4396 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4397 tcg_temp_free_i64(t1);
4398 break;
4399
4400 default:
4401 g_assert_not_reached();
4402 }
8713f73e 4403 tcg_gen_st_i64(t0, base, vofs + len_align);
b2aa8879 4404 tcg_temp_free_i64(t0);
d1822297 4405 }
d1822297
RH
4406}
4407
5047c204 4408/* Similarly for stores. */
8713f73e
RH
4409void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
4410 int len, int rn, int imm)
5047c204 4411{
19f2acc9
RH
4412 int len_align = QEMU_ALIGN_DOWN(len, 8);
4413 int len_remain = len % 8;
4414 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4415 int midx = get_mem_index(s);
bba87d0a 4416 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4417
bba87d0a
RH
4418 dirty_addr = tcg_temp_new_i64();
4419 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4420 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4421 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4422
4423 /* Note that unpredicated load/store of vector/predicate registers
4424 * are defined as a stream of bytes, which equates to little-endian
4425 * operations on larger quantities. There is no nice way to force
4426 * a little-endian store for aarch64_be-linux-user out of line.
4427 *
4428 * Attempt to keep code expansion to a minimum by limiting the
4429 * amount of unrolling done.
4430 */
4431 if (nparts <= 4) {
4432 int i;
4433
bba87d0a 4434 t0 = tcg_temp_new_i64();
5047c204 4435 for (i = 0; i < len_align; i += 8) {
8713f73e 4436 tcg_gen_ld_i64(t0, base, vofs + i);
fc313c64 4437 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4438 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4439 }
bba87d0a 4440 tcg_temp_free_i64(t0);
5047c204
RH
4441 } else {
4442 TCGLabel *loop = gen_new_label();
bba87d0a 4443 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4444
bba87d0a
RH
4445 /* Copy the clean address into a local temp, live across the loop. */
4446 t0 = clean_addr;
4b4dc975 4447 clean_addr = new_tmp_a64_local(s);
bba87d0a 4448 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4449
8713f73e
RH
4450 if (base != cpu_env) {
4451 TCGv_ptr b = tcg_temp_local_new_ptr();
4452 tcg_gen_mov_ptr(b, base);
4453 base = b;
4454 }
4455
bba87d0a 4456 gen_set_label(loop);
5047c204 4457
bba87d0a
RH
4458 t0 = tcg_temp_new_i64();
4459 tp = tcg_temp_new_ptr();
8713f73e 4460 tcg_gen_add_ptr(tp, base, i);
bba87d0a 4461 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4462 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4463 tcg_temp_free_ptr(tp);
4464
fc313c64 4465 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4466 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4467 tcg_temp_free_i64(t0);
5047c204
RH
4468
4469 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4470 tcg_temp_free_ptr(i);
8713f73e
RH
4471
4472 if (base != cpu_env) {
4473 tcg_temp_free_ptr(base);
4474 assert(len_remain == 0);
4475 }
5047c204
RH
4476 }
4477
4478 /* Predicate register stores can be any multiple of 2. */
4479 if (len_remain) {
bba87d0a 4480 t0 = tcg_temp_new_i64();
8713f73e 4481 tcg_gen_ld_i64(t0, base, vofs + len_align);
5047c204
RH
4482
4483 switch (len_remain) {
4484 case 2:
4485 case 4:
4486 case 8:
bba87d0a
RH
4487 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4488 MO_LE | ctz32(len_remain));
5047c204
RH
4489 break;
4490
4491 case 6:
bba87d0a
RH
4492 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4493 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4494 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4495 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4496 break;
4497
4498 default:
4499 g_assert_not_reached();
4500 }
bba87d0a 4501 tcg_temp_free_i64(t0);
5047c204 4502 }
5047c204
RH
4503}
4504
3a7be554 4505static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297 4506{
1402a6b8
RH
4507 if (!dc_isar_feature(aa64_sve, s)) {
4508 return false;
4509 }
d1822297
RH
4510 if (sve_access_check(s)) {
4511 int size = vec_full_reg_size(s);
4512 int off = vec_full_reg_offset(s, a->rd);
8713f73e 4513 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
d1822297
RH
4514 }
4515 return true;
4516}
4517
3a7be554 4518static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297 4519{
1402a6b8
RH
4520 if (!dc_isar_feature(aa64_sve, s)) {
4521 return false;
4522 }
d1822297
RH
4523 if (sve_access_check(s)) {
4524 int size = pred_full_reg_size(s);
4525 int off = pred_full_reg_offset(s, a->rd);
8713f73e 4526 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
d1822297
RH
4527 }
4528 return true;
4529}
c4e7c493 4530
3a7be554 4531static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204 4532{
1402a6b8
RH
4533 if (!dc_isar_feature(aa64_sve, s)) {
4534 return false;
4535 }
5047c204
RH
4536 if (sve_access_check(s)) {
4537 int size = vec_full_reg_size(s);
4538 int off = vec_full_reg_offset(s, a->rd);
8713f73e 4539 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
5047c204
RH
4540 }
4541 return true;
4542}
4543
3a7be554 4544static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204 4545{
1402a6b8
RH
4546 if (!dc_isar_feature(aa64_sve, s)) {
4547 return false;
4548 }
5047c204
RH
4549 if (sve_access_check(s)) {
4550 int size = pred_full_reg_size(s);
4551 int off = pred_full_reg_offset(s, a->rd);
8713f73e 4552 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
5047c204
RH
4553 }
4554 return true;
4555}
4556
c4e7c493
RH
4557/*
4558 *** SVE Memory - Contiguous Load Group
4559 */
4560
4561/* The memory mode of the dtype. */
14776ab5 4562static const MemOp dtype_mop[16] = {
c4e7c493
RH
4563 MO_UB, MO_UB, MO_UB, MO_UB,
4564 MO_SL, MO_UW, MO_UW, MO_UW,
4565 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4566 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4567};
4568
4569#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4570
4571/* The vector element size of dtype. */
4572static const uint8_t dtype_esz[16] = {
4573 0, 1, 2, 3,
4574 3, 1, 2, 3,
4575 3, 2, 2, 3,
4576 3, 2, 1, 3
4577};
4578
4579static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4580 int dtype, uint32_t mte_n, bool is_write,
4581 gen_helper_gvec_mem *fn)
c4e7c493
RH
4582{
4583 unsigned vsz = vec_full_reg_size(s);
4584 TCGv_ptr t_pg;
206adacf 4585 int desc = 0;
c4e7c493 4586
206adacf
RH
4587 /*
4588 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4589 * registers as pointers, so encode the regno into the data field.
4590 * For consistency, do this even for LD1.
4591 */
9473d0ec 4592 if (s->mte_active[0]) {
206adacf
RH
4593 int msz = dtype_msz(dtype);
4594
4595 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4596 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4597 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4598 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4599 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4600 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4601 } else {
4602 addr = clean_data_tbi(s, addr);
206adacf 4603 }
9473d0ec 4604
206adacf 4605 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4606 t_pg = tcg_temp_new_ptr();
4607
4608 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4609 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4610
4611 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4612}
4613
c182c6db
RH
4614/* Indexed by [mte][be][dtype][nreg] */
4615static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4616 { /* mte inactive, little-endian */
4617 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4618 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4619 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4620 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4621 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4622
4623 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4625 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4626 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4627 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4628
4629 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4630 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4632 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4633 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4634
4635 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4637 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4638 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4639 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4640
4641 /* mte inactive, big-endian */
4642 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4643 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4644 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4646 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4647
4648 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4649 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4650 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4651 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4653
4654 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4655 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4656 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4657 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4658 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4659
4660 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4661 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4662 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4663 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4664 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4665
4666 { /* mte active, little-endian */
4667 { { gen_helper_sve_ld1bb_r_mte,
4668 gen_helper_sve_ld2bb_r_mte,
4669 gen_helper_sve_ld3bb_r_mte,
4670 gen_helper_sve_ld4bb_r_mte },
4671 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4672 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4673 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4674
4675 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4676 { gen_helper_sve_ld1hh_le_r_mte,
4677 gen_helper_sve_ld2hh_le_r_mte,
4678 gen_helper_sve_ld3hh_le_r_mte,
4679 gen_helper_sve_ld4hh_le_r_mte },
4680 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4681 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4682
4683 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4684 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4685 { gen_helper_sve_ld1ss_le_r_mte,
4686 gen_helper_sve_ld2ss_le_r_mte,
4687 gen_helper_sve_ld3ss_le_r_mte,
4688 gen_helper_sve_ld4ss_le_r_mte },
4689 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4690
4691 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4692 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4693 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4694 { gen_helper_sve_ld1dd_le_r_mte,
4695 gen_helper_sve_ld2dd_le_r_mte,
4696 gen_helper_sve_ld3dd_le_r_mte,
4697 gen_helper_sve_ld4dd_le_r_mte } },
4698
4699 /* mte active, big-endian */
4700 { { gen_helper_sve_ld1bb_r_mte,
4701 gen_helper_sve_ld2bb_r_mte,
4702 gen_helper_sve_ld3bb_r_mte,
4703 gen_helper_sve_ld4bb_r_mte },
4704 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4705 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4706 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4707
4708 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4709 { gen_helper_sve_ld1hh_be_r_mte,
4710 gen_helper_sve_ld2hh_be_r_mte,
4711 gen_helper_sve_ld3hh_be_r_mte,
4712 gen_helper_sve_ld4hh_be_r_mte },
4713 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4714 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4715
4716 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4717 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4718 { gen_helper_sve_ld1ss_be_r_mte,
4719 gen_helper_sve_ld2ss_be_r_mte,
4720 gen_helper_sve_ld3ss_be_r_mte,
4721 gen_helper_sve_ld4ss_be_r_mte },
4722 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4723
4724 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4725 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4726 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4727 { gen_helper_sve_ld1dd_be_r_mte,
4728 gen_helper_sve_ld2dd_be_r_mte,
4729 gen_helper_sve_ld3dd_be_r_mte,
4730 gen_helper_sve_ld4dd_be_r_mte } } },
4731};
4732
c4e7c493
RH
4733static void do_ld_zpa(DisasContext *s, int zt, int pg,
4734 TCGv_i64 addr, int dtype, int nreg)
4735{
206adacf 4736 gen_helper_gvec_mem *fn
c182c6db 4737 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4738
206adacf
RH
4739 /*
4740 * While there are holes in the table, they are not
c4e7c493
RH
4741 * accessible via the instruction encoding.
4742 */
4743 assert(fn != NULL);
206adacf 4744 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4745}
4746
3a7be554 4747static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493 4748{
1402a6b8 4749 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
c4e7c493
RH
4750 return false;
4751 }
4752 if (sve_access_check(s)) {
4753 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4754 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4755 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4756 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4757 }
4758 return true;
4759}
4760
3a7be554 4761static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493 4762{
1402a6b8
RH
4763 if (!dc_isar_feature(aa64_sve, s)) {
4764 return false;
4765 }
c4e7c493
RH
4766 if (sve_access_check(s)) {
4767 int vsz = vec_full_reg_size(s);
4768 int elements = vsz >> dtype_esz[a->dtype];
4769 TCGv_i64 addr = new_tmp_a64(s);
4770
4771 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4772 (a->imm * elements * (a->nreg + 1))
4773 << dtype_msz(a->dtype));
4774 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4775 }
4776 return true;
4777}
e2654d75 4778
3a7be554 4779static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4780{
aa13f7c3
RH
4781 static gen_helper_gvec_mem * const fns[2][2][16] = {
4782 { /* mte inactive, little-endian */
4783 { gen_helper_sve_ldff1bb_r,
4784 gen_helper_sve_ldff1bhu_r,
4785 gen_helper_sve_ldff1bsu_r,
4786 gen_helper_sve_ldff1bdu_r,
4787
4788 gen_helper_sve_ldff1sds_le_r,
4789 gen_helper_sve_ldff1hh_le_r,
4790 gen_helper_sve_ldff1hsu_le_r,
4791 gen_helper_sve_ldff1hdu_le_r,
4792
4793 gen_helper_sve_ldff1hds_le_r,
4794 gen_helper_sve_ldff1hss_le_r,
4795 gen_helper_sve_ldff1ss_le_r,
4796 gen_helper_sve_ldff1sdu_le_r,
4797
4798 gen_helper_sve_ldff1bds_r,
4799 gen_helper_sve_ldff1bss_r,
4800 gen_helper_sve_ldff1bhs_r,
4801 gen_helper_sve_ldff1dd_le_r },
4802
4803 /* mte inactive, big-endian */
4804 { gen_helper_sve_ldff1bb_r,
4805 gen_helper_sve_ldff1bhu_r,
4806 gen_helper_sve_ldff1bsu_r,
4807 gen_helper_sve_ldff1bdu_r,
4808
4809 gen_helper_sve_ldff1sds_be_r,
4810 gen_helper_sve_ldff1hh_be_r,
4811 gen_helper_sve_ldff1hsu_be_r,
4812 gen_helper_sve_ldff1hdu_be_r,
4813
4814 gen_helper_sve_ldff1hds_be_r,
4815 gen_helper_sve_ldff1hss_be_r,
4816 gen_helper_sve_ldff1ss_be_r,
4817 gen_helper_sve_ldff1sdu_be_r,
4818
4819 gen_helper_sve_ldff1bds_r,
4820 gen_helper_sve_ldff1bss_r,
4821 gen_helper_sve_ldff1bhs_r,
4822 gen_helper_sve_ldff1dd_be_r } },
4823
4824 { /* mte active, little-endian */
4825 { gen_helper_sve_ldff1bb_r_mte,
4826 gen_helper_sve_ldff1bhu_r_mte,
4827 gen_helper_sve_ldff1bsu_r_mte,
4828 gen_helper_sve_ldff1bdu_r_mte,
4829
4830 gen_helper_sve_ldff1sds_le_r_mte,
4831 gen_helper_sve_ldff1hh_le_r_mte,
4832 gen_helper_sve_ldff1hsu_le_r_mte,
4833 gen_helper_sve_ldff1hdu_le_r_mte,
4834
4835 gen_helper_sve_ldff1hds_le_r_mte,
4836 gen_helper_sve_ldff1hss_le_r_mte,
4837 gen_helper_sve_ldff1ss_le_r_mte,
4838 gen_helper_sve_ldff1sdu_le_r_mte,
4839
4840 gen_helper_sve_ldff1bds_r_mte,
4841 gen_helper_sve_ldff1bss_r_mte,
4842 gen_helper_sve_ldff1bhs_r_mte,
4843 gen_helper_sve_ldff1dd_le_r_mte },
4844
4845 /* mte active, big-endian */
4846 { gen_helper_sve_ldff1bb_r_mte,
4847 gen_helper_sve_ldff1bhu_r_mte,
4848 gen_helper_sve_ldff1bsu_r_mte,
4849 gen_helper_sve_ldff1bdu_r_mte,
4850
4851 gen_helper_sve_ldff1sds_be_r_mte,
4852 gen_helper_sve_ldff1hh_be_r_mte,
4853 gen_helper_sve_ldff1hsu_be_r_mte,
4854 gen_helper_sve_ldff1hdu_be_r_mte,
4855
4856 gen_helper_sve_ldff1hds_be_r_mte,
4857 gen_helper_sve_ldff1hss_be_r_mte,
4858 gen_helper_sve_ldff1ss_be_r_mte,
4859 gen_helper_sve_ldff1sdu_be_r_mte,
4860
4861 gen_helper_sve_ldff1bds_r_mte,
4862 gen_helper_sve_ldff1bss_r_mte,
4863 gen_helper_sve_ldff1bhs_r_mte,
4864 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4865 };
4866
1402a6b8
RH
4867 if (!dc_isar_feature(aa64_sve, s)) {
4868 return false;
4869 }
ccb1cefc 4870 s->is_nonstreaming = true;
e2654d75
RH
4871 if (sve_access_check(s)) {
4872 TCGv_i64 addr = new_tmp_a64(s);
4873 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4874 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4875 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4876 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4877 }
4878 return true;
4879}
4880
3a7be554 4881static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4882{
aa13f7c3
RH
4883 static gen_helper_gvec_mem * const fns[2][2][16] = {
4884 { /* mte inactive, little-endian */
4885 { gen_helper_sve_ldnf1bb_r,
4886 gen_helper_sve_ldnf1bhu_r,
4887 gen_helper_sve_ldnf1bsu_r,
4888 gen_helper_sve_ldnf1bdu_r,
4889
4890 gen_helper_sve_ldnf1sds_le_r,
4891 gen_helper_sve_ldnf1hh_le_r,
4892 gen_helper_sve_ldnf1hsu_le_r,
4893 gen_helper_sve_ldnf1hdu_le_r,
4894
4895 gen_helper_sve_ldnf1hds_le_r,
4896 gen_helper_sve_ldnf1hss_le_r,
4897 gen_helper_sve_ldnf1ss_le_r,
4898 gen_helper_sve_ldnf1sdu_le_r,
4899
4900 gen_helper_sve_ldnf1bds_r,
4901 gen_helper_sve_ldnf1bss_r,
4902 gen_helper_sve_ldnf1bhs_r,
4903 gen_helper_sve_ldnf1dd_le_r },
4904
4905 /* mte inactive, big-endian */
4906 { gen_helper_sve_ldnf1bb_r,
4907 gen_helper_sve_ldnf1bhu_r,
4908 gen_helper_sve_ldnf1bsu_r,
4909 gen_helper_sve_ldnf1bdu_r,
4910
4911 gen_helper_sve_ldnf1sds_be_r,
4912 gen_helper_sve_ldnf1hh_be_r,
4913 gen_helper_sve_ldnf1hsu_be_r,
4914 gen_helper_sve_ldnf1hdu_be_r,
4915
4916 gen_helper_sve_ldnf1hds_be_r,
4917 gen_helper_sve_ldnf1hss_be_r,
4918 gen_helper_sve_ldnf1ss_be_r,
4919 gen_helper_sve_ldnf1sdu_be_r,
4920
4921 gen_helper_sve_ldnf1bds_r,
4922 gen_helper_sve_ldnf1bss_r,
4923 gen_helper_sve_ldnf1bhs_r,
4924 gen_helper_sve_ldnf1dd_be_r } },
4925
4926 { /* mte inactive, little-endian */
4927 { gen_helper_sve_ldnf1bb_r_mte,
4928 gen_helper_sve_ldnf1bhu_r_mte,
4929 gen_helper_sve_ldnf1bsu_r_mte,
4930 gen_helper_sve_ldnf1bdu_r_mte,
4931
4932 gen_helper_sve_ldnf1sds_le_r_mte,
4933 gen_helper_sve_ldnf1hh_le_r_mte,
4934 gen_helper_sve_ldnf1hsu_le_r_mte,
4935 gen_helper_sve_ldnf1hdu_le_r_mte,
4936
4937 gen_helper_sve_ldnf1hds_le_r_mte,
4938 gen_helper_sve_ldnf1hss_le_r_mte,
4939 gen_helper_sve_ldnf1ss_le_r_mte,
4940 gen_helper_sve_ldnf1sdu_le_r_mte,
4941
4942 gen_helper_sve_ldnf1bds_r_mte,
4943 gen_helper_sve_ldnf1bss_r_mte,
4944 gen_helper_sve_ldnf1bhs_r_mte,
4945 gen_helper_sve_ldnf1dd_le_r_mte },
4946
4947 /* mte inactive, big-endian */
4948 { gen_helper_sve_ldnf1bb_r_mte,
4949 gen_helper_sve_ldnf1bhu_r_mte,
4950 gen_helper_sve_ldnf1bsu_r_mte,
4951 gen_helper_sve_ldnf1bdu_r_mte,
4952
4953 gen_helper_sve_ldnf1sds_be_r_mte,
4954 gen_helper_sve_ldnf1hh_be_r_mte,
4955 gen_helper_sve_ldnf1hsu_be_r_mte,
4956 gen_helper_sve_ldnf1hdu_be_r_mte,
4957
4958 gen_helper_sve_ldnf1hds_be_r_mte,
4959 gen_helper_sve_ldnf1hss_be_r_mte,
4960 gen_helper_sve_ldnf1ss_be_r_mte,
4961 gen_helper_sve_ldnf1sdu_be_r_mte,
4962
4963 gen_helper_sve_ldnf1bds_r_mte,
4964 gen_helper_sve_ldnf1bss_r_mte,
4965 gen_helper_sve_ldnf1bhs_r_mte,
4966 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4967 };
4968
1402a6b8
RH
4969 if (!dc_isar_feature(aa64_sve, s)) {
4970 return false;
4971 }
ccb1cefc 4972 s->is_nonstreaming = true;
e2654d75
RH
4973 if (sve_access_check(s)) {
4974 int vsz = vec_full_reg_size(s);
4975 int elements = vsz >> dtype_esz[a->dtype];
4976 int off = (a->imm * elements) << dtype_msz(a->dtype);
4977 TCGv_i64 addr = new_tmp_a64(s);
4978
4979 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4980 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4981 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4982 }
4983 return true;
4984}
1a039c7e 4985
c182c6db 4986static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 4987{
05abe304
RH
4988 unsigned vsz = vec_full_reg_size(s);
4989 TCGv_ptr t_pg;
7924d239 4990 int poff;
05abe304
RH
4991
4992 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
4993 poff = pred_full_reg_offset(s, pg);
4994 if (vsz > 16) {
4995 /*
4996 * Zero-extend the first 16 bits of the predicate into a temporary.
4997 * This avoids triggering an assert making sure we don't have bits
4998 * set within a predicate beyond VQ, but we have lowered VQ to 1
4999 * for this load operation.
5000 */
5001 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5002#if HOST_BIG_ENDIAN
2a99ab2b
RH
5003 poff += 6;
5004#endif
5005 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5006
5007 poff = offsetof(CPUARMState, vfp.preg_tmp);
5008 tcg_gen_st_i64(tmp, cpu_env, poff);
5009 tcg_temp_free_i64(tmp);
5010 }
5011
05abe304 5012 t_pg = tcg_temp_new_ptr();
2a99ab2b 5013 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5014
c182c6db
RH
5015 gen_helper_gvec_mem *fn
5016 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5017 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5018
5019 tcg_temp_free_ptr(t_pg);
05abe304
RH
5020
5021 /* Replicate that first quadword. */
5022 if (vsz > 16) {
7924d239
RH
5023 int doff = vec_full_reg_offset(s, zt);
5024 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5025 }
5026}
5027
3a7be554 5028static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304 5029{
1402a6b8 5030 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
05abe304
RH
5031 return false;
5032 }
5033 if (sve_access_check(s)) {
5034 int msz = dtype_msz(a->dtype);
5035 TCGv_i64 addr = new_tmp_a64(s);
5036 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5037 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5038 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5039 }
5040 return true;
5041}
5042
3a7be554 5043static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304 5044{
1402a6b8
RH
5045 if (!dc_isar_feature(aa64_sve, s)) {
5046 return false;
5047 }
05abe304
RH
5048 if (sve_access_check(s)) {
5049 TCGv_i64 addr = new_tmp_a64(s);
5050 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5051 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5052 }
5053 return true;
5054}
5055
12c563f6
RH
5056static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5057{
5058 unsigned vsz = vec_full_reg_size(s);
5059 unsigned vsz_r32;
5060 TCGv_ptr t_pg;
5061 int poff, doff;
5062
5063 if (vsz < 32) {
5064 /*
5065 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5066 * in the ARM pseudocode, which is the sve_access_check() done
5067 * in our caller. We should not now return false from the caller.
5068 */
5069 unallocated_encoding(s);
5070 return;
5071 }
5072
5073 /* Load the first octaword using the normal predicated load helpers. */
5074
5075 poff = pred_full_reg_offset(s, pg);
5076 if (vsz > 32) {
5077 /*
5078 * Zero-extend the first 32 bits of the predicate into a temporary.
5079 * This avoids triggering an assert making sure we don't have bits
5080 * set within a predicate beyond VQ, but we have lowered VQ to 2
5081 * for this load operation.
5082 */
5083 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5084#if HOST_BIG_ENDIAN
12c563f6
RH
5085 poff += 4;
5086#endif
5087 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5088
5089 poff = offsetof(CPUARMState, vfp.preg_tmp);
5090 tcg_gen_st_i64(tmp, cpu_env, poff);
5091 tcg_temp_free_i64(tmp);
5092 }
5093
5094 t_pg = tcg_temp_new_ptr();
5095 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5096
5097 gen_helper_gvec_mem *fn
5098 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5099 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5100
5101 tcg_temp_free_ptr(t_pg);
5102
5103 /*
5104 * Replicate that first octaword.
5105 * The replication happens in units of 32; if the full vector size
5106 * is not a multiple of 32, the final bits are zeroed.
5107 */
5108 doff = vec_full_reg_offset(s, zt);
5109 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5110 if (vsz >= 64) {
5111 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5112 }
5113 vsz -= vsz_r32;
5114 if (vsz) {
5115 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5116 }
5117}
5118
5119static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5120{
5121 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5122 return false;
5123 }
5124 if (a->rm == 31) {
5125 return false;
5126 }
3ebc26e7 5127 s->is_nonstreaming = true;
12c563f6
RH
5128 if (sve_access_check(s)) {
5129 TCGv_i64 addr = new_tmp_a64(s);
5130 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5131 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5132 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5133 }
5134 return true;
5135}
5136
5137static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5138{
5139 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5140 return false;
5141 }
3ebc26e7 5142 s->is_nonstreaming = true;
12c563f6
RH
5143 if (sve_access_check(s)) {
5144 TCGv_i64 addr = new_tmp_a64(s);
5145 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5146 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5147 }
5148 return true;
5149}
5150
68459864 5151/* Load and broadcast element. */
3a7be554 5152static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5153{
68459864
RH
5154 unsigned vsz = vec_full_reg_size(s);
5155 unsigned psz = pred_full_reg_size(s);
5156 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5157 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5158 TCGLabel *over;
4ac430e1 5159 TCGv_i64 temp, clean_addr;
68459864 5160
1402a6b8
RH
5161 if (!dc_isar_feature(aa64_sve, s)) {
5162 return false;
5163 }
c0ed9166
RH
5164 if (!sve_access_check(s)) {
5165 return true;
5166 }
5167
5168 over = gen_new_label();
5169
68459864
RH
5170 /* If the guarding predicate has no bits set, no load occurs. */
5171 if (psz <= 8) {
5172 /* Reduce the pred_esz_masks value simply to reduce the
5173 * size of the code generated here.
5174 */
5175 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5176 temp = tcg_temp_new_i64();
5177 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5178 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5179 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5180 tcg_temp_free_i64(temp);
5181 } else {
5182 TCGv_i32 t32 = tcg_temp_new_i32();
5183 find_last_active(s, t32, esz, a->pg);
5184 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5185 tcg_temp_free_i32(t32);
5186 }
5187
5188 /* Load the data. */
5189 temp = tcg_temp_new_i64();
d0e372b0 5190 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5191 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5192
5193 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5194 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5195
5196 /* Broadcast to *all* elements. */
5197 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5198 vsz, vsz, temp);
5199 tcg_temp_free_i64(temp);
5200
5201 /* Zero the inactive elements. */
5202 gen_set_label(over);
60245996 5203 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5204}
5205
1a039c7e
RH
5206static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5207 int msz, int esz, int nreg)
5208{
71b9f394
RH
5209 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5210 { { { gen_helper_sve_st1bb_r,
5211 gen_helper_sve_st1bh_r,
5212 gen_helper_sve_st1bs_r,
5213 gen_helper_sve_st1bd_r },
5214 { NULL,
5215 gen_helper_sve_st1hh_le_r,
5216 gen_helper_sve_st1hs_le_r,
5217 gen_helper_sve_st1hd_le_r },
5218 { NULL, NULL,
5219 gen_helper_sve_st1ss_le_r,
5220 gen_helper_sve_st1sd_le_r },
5221 { NULL, NULL, NULL,
5222 gen_helper_sve_st1dd_le_r } },
5223 { { gen_helper_sve_st1bb_r,
5224 gen_helper_sve_st1bh_r,
5225 gen_helper_sve_st1bs_r,
5226 gen_helper_sve_st1bd_r },
5227 { NULL,
5228 gen_helper_sve_st1hh_be_r,
5229 gen_helper_sve_st1hs_be_r,
5230 gen_helper_sve_st1hd_be_r },
5231 { NULL, NULL,
5232 gen_helper_sve_st1ss_be_r,
5233 gen_helper_sve_st1sd_be_r },
5234 { NULL, NULL, NULL,
5235 gen_helper_sve_st1dd_be_r } } },
5236
5237 { { { gen_helper_sve_st1bb_r_mte,
5238 gen_helper_sve_st1bh_r_mte,
5239 gen_helper_sve_st1bs_r_mte,
5240 gen_helper_sve_st1bd_r_mte },
5241 { NULL,
5242 gen_helper_sve_st1hh_le_r_mte,
5243 gen_helper_sve_st1hs_le_r_mte,
5244 gen_helper_sve_st1hd_le_r_mte },
5245 { NULL, NULL,
5246 gen_helper_sve_st1ss_le_r_mte,
5247 gen_helper_sve_st1sd_le_r_mte },
5248 { NULL, NULL, NULL,
5249 gen_helper_sve_st1dd_le_r_mte } },
5250 { { gen_helper_sve_st1bb_r_mte,
5251 gen_helper_sve_st1bh_r_mte,
5252 gen_helper_sve_st1bs_r_mte,
5253 gen_helper_sve_st1bd_r_mte },
5254 { NULL,
5255 gen_helper_sve_st1hh_be_r_mte,
5256 gen_helper_sve_st1hs_be_r_mte,
5257 gen_helper_sve_st1hd_be_r_mte },
5258 { NULL, NULL,
5259 gen_helper_sve_st1ss_be_r_mte,
5260 gen_helper_sve_st1sd_be_r_mte },
5261 { NULL, NULL, NULL,
5262 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5263 };
71b9f394
RH
5264 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5265 { { { gen_helper_sve_st2bb_r,
5266 gen_helper_sve_st2hh_le_r,
5267 gen_helper_sve_st2ss_le_r,
5268 gen_helper_sve_st2dd_le_r },
5269 { gen_helper_sve_st3bb_r,
5270 gen_helper_sve_st3hh_le_r,
5271 gen_helper_sve_st3ss_le_r,
5272 gen_helper_sve_st3dd_le_r },
5273 { gen_helper_sve_st4bb_r,
5274 gen_helper_sve_st4hh_le_r,
5275 gen_helper_sve_st4ss_le_r,
5276 gen_helper_sve_st4dd_le_r } },
5277 { { gen_helper_sve_st2bb_r,
5278 gen_helper_sve_st2hh_be_r,
5279 gen_helper_sve_st2ss_be_r,
5280 gen_helper_sve_st2dd_be_r },
5281 { gen_helper_sve_st3bb_r,
5282 gen_helper_sve_st3hh_be_r,
5283 gen_helper_sve_st3ss_be_r,
5284 gen_helper_sve_st3dd_be_r },
5285 { gen_helper_sve_st4bb_r,
5286 gen_helper_sve_st4hh_be_r,
5287 gen_helper_sve_st4ss_be_r,
5288 gen_helper_sve_st4dd_be_r } } },
5289 { { { gen_helper_sve_st2bb_r_mte,
5290 gen_helper_sve_st2hh_le_r_mte,
5291 gen_helper_sve_st2ss_le_r_mte,
5292 gen_helper_sve_st2dd_le_r_mte },
5293 { gen_helper_sve_st3bb_r_mte,
5294 gen_helper_sve_st3hh_le_r_mte,
5295 gen_helper_sve_st3ss_le_r_mte,
5296 gen_helper_sve_st3dd_le_r_mte },
5297 { gen_helper_sve_st4bb_r_mte,
5298 gen_helper_sve_st4hh_le_r_mte,
5299 gen_helper_sve_st4ss_le_r_mte,
5300 gen_helper_sve_st4dd_le_r_mte } },
5301 { { gen_helper_sve_st2bb_r_mte,
5302 gen_helper_sve_st2hh_be_r_mte,
5303 gen_helper_sve_st2ss_be_r_mte,
5304 gen_helper_sve_st2dd_be_r_mte },
5305 { gen_helper_sve_st3bb_r_mte,
5306 gen_helper_sve_st3hh_be_r_mte,
5307 gen_helper_sve_st3ss_be_r_mte,
5308 gen_helper_sve_st3dd_be_r_mte },
5309 { gen_helper_sve_st4bb_r_mte,
5310 gen_helper_sve_st4hh_be_r_mte,
5311 gen_helper_sve_st4ss_be_r_mte,
5312 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5313 };
5314 gen_helper_gvec_mem *fn;
28d57f2d 5315 int be = s->be_data == MO_BE;
1a039c7e
RH
5316
5317 if (nreg == 0) {
5318 /* ST1 */
71b9f394
RH
5319 fn = fn_single[s->mte_active[0]][be][msz][esz];
5320 nreg = 1;
1a039c7e
RH
5321 } else {
5322 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5323 assert(msz == esz);
71b9f394 5324 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5325 }
5326 assert(fn != NULL);
71b9f394 5327 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5328}
5329
3a7be554 5330static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e 5331{
1402a6b8
RH
5332 if (!dc_isar_feature(aa64_sve, s)) {
5333 return false;
5334 }
1a039c7e
RH
5335 if (a->rm == 31 || a->msz > a->esz) {
5336 return false;
5337 }
5338 if (sve_access_check(s)) {
5339 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5340 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5341 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5342 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5343 }
5344 return true;
5345}
5346
3a7be554 5347static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e 5348{
1402a6b8
RH
5349 if (!dc_isar_feature(aa64_sve, s)) {
5350 return false;
5351 }
1a039c7e
RH
5352 if (a->msz > a->esz) {
5353 return false;
5354 }
5355 if (sve_access_check(s)) {
5356 int vsz = vec_full_reg_size(s);
5357 int elements = vsz >> a->esz;
5358 TCGv_i64 addr = new_tmp_a64(s);
5359
5360 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5361 (a->imm * elements * (a->nreg + 1)) << a->msz);
5362 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5363 }
5364 return true;
5365}
f6dbf62a
RH
5366
5367/*
5368 *** SVE gather loads / scatter stores
5369 */
5370
500d0484 5371static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5372 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5373 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5374{
5375 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5376 TCGv_ptr t_zm = tcg_temp_new_ptr();
5377 TCGv_ptr t_pg = tcg_temp_new_ptr();
5378 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5379 int desc = 0;
500d0484 5380
d28d12f0
RH
5381 if (s->mte_active[0]) {
5382 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5383 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5384 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5385 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5386 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5387 desc <<= SVE_MTEDESC_SHIFT;
5388 }
cdecb3fc 5389 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5390
5391 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5392 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5393 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5394 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5395
5396 tcg_temp_free_ptr(t_zt);
5397 tcg_temp_free_ptr(t_zm);
5398 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5399}
5400
d28d12f0
RH
5401/* Indexed by [mte][be][ff][xs][u][msz]. */
5402static gen_helper_gvec_mem_scatter * const
5403gather_load_fn32[2][2][2][2][2][3] = {
5404 { /* MTE Inactive */
5405 { /* Little-endian */
5406 { { { gen_helper_sve_ldbss_zsu,
5407 gen_helper_sve_ldhss_le_zsu,
5408 NULL, },
5409 { gen_helper_sve_ldbsu_zsu,
5410 gen_helper_sve_ldhsu_le_zsu,
5411 gen_helper_sve_ldss_le_zsu, } },
5412 { { gen_helper_sve_ldbss_zss,
5413 gen_helper_sve_ldhss_le_zss,
5414 NULL, },
5415 { gen_helper_sve_ldbsu_zss,
5416 gen_helper_sve_ldhsu_le_zss,
5417 gen_helper_sve_ldss_le_zss, } } },
5418
5419 /* First-fault */
5420 { { { gen_helper_sve_ldffbss_zsu,
5421 gen_helper_sve_ldffhss_le_zsu,
5422 NULL, },
5423 { gen_helper_sve_ldffbsu_zsu,
5424 gen_helper_sve_ldffhsu_le_zsu,
5425 gen_helper_sve_ldffss_le_zsu, } },
5426 { { gen_helper_sve_ldffbss_zss,
5427 gen_helper_sve_ldffhss_le_zss,
5428 NULL, },
5429 { gen_helper_sve_ldffbsu_zss,
5430 gen_helper_sve_ldffhsu_le_zss,
5431 gen_helper_sve_ldffss_le_zss, } } } },
5432
5433 { /* Big-endian */
5434 { { { gen_helper_sve_ldbss_zsu,
5435 gen_helper_sve_ldhss_be_zsu,
5436 NULL, },
5437 { gen_helper_sve_ldbsu_zsu,
5438 gen_helper_sve_ldhsu_be_zsu,
5439 gen_helper_sve_ldss_be_zsu, } },
5440 { { gen_helper_sve_ldbss_zss,
5441 gen_helper_sve_ldhss_be_zss,
5442 NULL, },
5443 { gen_helper_sve_ldbsu_zss,
5444 gen_helper_sve_ldhsu_be_zss,
5445 gen_helper_sve_ldss_be_zss, } } },
5446
5447 /* First-fault */
5448 { { { gen_helper_sve_ldffbss_zsu,
5449 gen_helper_sve_ldffhss_be_zsu,
5450 NULL, },
5451 { gen_helper_sve_ldffbsu_zsu,
5452 gen_helper_sve_ldffhsu_be_zsu,
5453 gen_helper_sve_ldffss_be_zsu, } },
5454 { { gen_helper_sve_ldffbss_zss,
5455 gen_helper_sve_ldffhss_be_zss,
5456 NULL, },
5457 { gen_helper_sve_ldffbsu_zss,
5458 gen_helper_sve_ldffhsu_be_zss,
5459 gen_helper_sve_ldffss_be_zss, } } } } },
5460 { /* MTE Active */
5461 { /* Little-endian */
5462 { { { gen_helper_sve_ldbss_zsu_mte,
5463 gen_helper_sve_ldhss_le_zsu_mte,
5464 NULL, },
5465 { gen_helper_sve_ldbsu_zsu_mte,
5466 gen_helper_sve_ldhsu_le_zsu_mte,
5467 gen_helper_sve_ldss_le_zsu_mte, } },
5468 { { gen_helper_sve_ldbss_zss_mte,
5469 gen_helper_sve_ldhss_le_zss_mte,
5470 NULL, },
5471 { gen_helper_sve_ldbsu_zss_mte,
5472 gen_helper_sve_ldhsu_le_zss_mte,
5473 gen_helper_sve_ldss_le_zss_mte, } } },
5474
5475 /* First-fault */
5476 { { { gen_helper_sve_ldffbss_zsu_mte,
5477 gen_helper_sve_ldffhss_le_zsu_mte,
5478 NULL, },
5479 { gen_helper_sve_ldffbsu_zsu_mte,
5480 gen_helper_sve_ldffhsu_le_zsu_mte,
5481 gen_helper_sve_ldffss_le_zsu_mte, } },
5482 { { gen_helper_sve_ldffbss_zss_mte,
5483 gen_helper_sve_ldffhss_le_zss_mte,
5484 NULL, },
5485 { gen_helper_sve_ldffbsu_zss_mte,
5486 gen_helper_sve_ldffhsu_le_zss_mte,
5487 gen_helper_sve_ldffss_le_zss_mte, } } } },
5488
5489 { /* Big-endian */
5490 { { { gen_helper_sve_ldbss_zsu_mte,
5491 gen_helper_sve_ldhss_be_zsu_mte,
5492 NULL, },
5493 { gen_helper_sve_ldbsu_zsu_mte,
5494 gen_helper_sve_ldhsu_be_zsu_mte,
5495 gen_helper_sve_ldss_be_zsu_mte, } },
5496 { { gen_helper_sve_ldbss_zss_mte,
5497 gen_helper_sve_ldhss_be_zss_mte,
5498 NULL, },
5499 { gen_helper_sve_ldbsu_zss_mte,
5500 gen_helper_sve_ldhsu_be_zss_mte,
5501 gen_helper_sve_ldss_be_zss_mte, } } },
5502
5503 /* First-fault */
5504 { { { gen_helper_sve_ldffbss_zsu_mte,
5505 gen_helper_sve_ldffhss_be_zsu_mte,
5506 NULL, },
5507 { gen_helper_sve_ldffbsu_zsu_mte,
5508 gen_helper_sve_ldffhsu_be_zsu_mte,
5509 gen_helper_sve_ldffss_be_zsu_mte, } },
5510 { { gen_helper_sve_ldffbss_zss_mte,
5511 gen_helper_sve_ldffhss_be_zss_mte,
5512 NULL, },
5513 { gen_helper_sve_ldffbsu_zss_mte,
5514 gen_helper_sve_ldffhsu_be_zss_mte,
5515 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5516};
5517
5518/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5519static gen_helper_gvec_mem_scatter * const
5520gather_load_fn64[2][2][2][3][2][4] = {
5521 { /* MTE Inactive */
5522 { /* Little-endian */
5523 { { { gen_helper_sve_ldbds_zsu,
5524 gen_helper_sve_ldhds_le_zsu,
5525 gen_helper_sve_ldsds_le_zsu,
5526 NULL, },
5527 { gen_helper_sve_ldbdu_zsu,
5528 gen_helper_sve_ldhdu_le_zsu,
5529 gen_helper_sve_ldsdu_le_zsu,
5530 gen_helper_sve_lddd_le_zsu, } },
5531 { { gen_helper_sve_ldbds_zss,
5532 gen_helper_sve_ldhds_le_zss,
5533 gen_helper_sve_ldsds_le_zss,
5534 NULL, },
5535 { gen_helper_sve_ldbdu_zss,
5536 gen_helper_sve_ldhdu_le_zss,
5537 gen_helper_sve_ldsdu_le_zss,
5538 gen_helper_sve_lddd_le_zss, } },
5539 { { gen_helper_sve_ldbds_zd,
5540 gen_helper_sve_ldhds_le_zd,
5541 gen_helper_sve_ldsds_le_zd,
5542 NULL, },
5543 { gen_helper_sve_ldbdu_zd,
5544 gen_helper_sve_ldhdu_le_zd,
5545 gen_helper_sve_ldsdu_le_zd,
5546 gen_helper_sve_lddd_le_zd, } } },
5547
5548 /* First-fault */
5549 { { { gen_helper_sve_ldffbds_zsu,
5550 gen_helper_sve_ldffhds_le_zsu,
5551 gen_helper_sve_ldffsds_le_zsu,
5552 NULL, },
5553 { gen_helper_sve_ldffbdu_zsu,
5554 gen_helper_sve_ldffhdu_le_zsu,
5555 gen_helper_sve_ldffsdu_le_zsu,
5556 gen_helper_sve_ldffdd_le_zsu, } },
5557 { { gen_helper_sve_ldffbds_zss,
5558 gen_helper_sve_ldffhds_le_zss,
5559 gen_helper_sve_ldffsds_le_zss,
5560 NULL, },
5561 { gen_helper_sve_ldffbdu_zss,
5562 gen_helper_sve_ldffhdu_le_zss,
5563 gen_helper_sve_ldffsdu_le_zss,
5564 gen_helper_sve_ldffdd_le_zss, } },
5565 { { gen_helper_sve_ldffbds_zd,
5566 gen_helper_sve_ldffhds_le_zd,
5567 gen_helper_sve_ldffsds_le_zd,
5568 NULL, },
5569 { gen_helper_sve_ldffbdu_zd,
5570 gen_helper_sve_ldffhdu_le_zd,
5571 gen_helper_sve_ldffsdu_le_zd,
5572 gen_helper_sve_ldffdd_le_zd, } } } },
5573 { /* Big-endian */
5574 { { { gen_helper_sve_ldbds_zsu,
5575 gen_helper_sve_ldhds_be_zsu,
5576 gen_helper_sve_ldsds_be_zsu,
5577 NULL, },
5578 { gen_helper_sve_ldbdu_zsu,
5579 gen_helper_sve_ldhdu_be_zsu,
5580 gen_helper_sve_ldsdu_be_zsu,
5581 gen_helper_sve_lddd_be_zsu, } },
5582 { { gen_helper_sve_ldbds_zss,
5583 gen_helper_sve_ldhds_be_zss,
5584 gen_helper_sve_ldsds_be_zss,
5585 NULL, },
5586 { gen_helper_sve_ldbdu_zss,
5587 gen_helper_sve_ldhdu_be_zss,
5588 gen_helper_sve_ldsdu_be_zss,
5589 gen_helper_sve_lddd_be_zss, } },
5590 { { gen_helper_sve_ldbds_zd,
5591 gen_helper_sve_ldhds_be_zd,
5592 gen_helper_sve_ldsds_be_zd,
5593 NULL, },
5594 { gen_helper_sve_ldbdu_zd,
5595 gen_helper_sve_ldhdu_be_zd,
5596 gen_helper_sve_ldsdu_be_zd,
5597 gen_helper_sve_lddd_be_zd, } } },
5598
5599 /* First-fault */
5600 { { { gen_helper_sve_ldffbds_zsu,
5601 gen_helper_sve_ldffhds_be_zsu,
5602 gen_helper_sve_ldffsds_be_zsu,
5603 NULL, },
5604 { gen_helper_sve_ldffbdu_zsu,
5605 gen_helper_sve_ldffhdu_be_zsu,
5606 gen_helper_sve_ldffsdu_be_zsu,
5607 gen_helper_sve_ldffdd_be_zsu, } },
5608 { { gen_helper_sve_ldffbds_zss,
5609 gen_helper_sve_ldffhds_be_zss,
5610 gen_helper_sve_ldffsds_be_zss,
5611 NULL, },
5612 { gen_helper_sve_ldffbdu_zss,
5613 gen_helper_sve_ldffhdu_be_zss,
5614 gen_helper_sve_ldffsdu_be_zss,
5615 gen_helper_sve_ldffdd_be_zss, } },
5616 { { gen_helper_sve_ldffbds_zd,
5617 gen_helper_sve_ldffhds_be_zd,
5618 gen_helper_sve_ldffsds_be_zd,
5619 NULL, },
5620 { gen_helper_sve_ldffbdu_zd,
5621 gen_helper_sve_ldffhdu_be_zd,
5622 gen_helper_sve_ldffsdu_be_zd,
5623 gen_helper_sve_ldffdd_be_zd, } } } } },
5624 { /* MTE Active */
5625 { /* Little-endian */
5626 { { { gen_helper_sve_ldbds_zsu_mte,
5627 gen_helper_sve_ldhds_le_zsu_mte,
5628 gen_helper_sve_ldsds_le_zsu_mte,
5629 NULL, },
5630 { gen_helper_sve_ldbdu_zsu_mte,
5631 gen_helper_sve_ldhdu_le_zsu_mte,
5632 gen_helper_sve_ldsdu_le_zsu_mte,
5633 gen_helper_sve_lddd_le_zsu_mte, } },
5634 { { gen_helper_sve_ldbds_zss_mte,
5635 gen_helper_sve_ldhds_le_zss_mte,
5636 gen_helper_sve_ldsds_le_zss_mte,
5637 NULL, },
5638 { gen_helper_sve_ldbdu_zss_mte,
5639 gen_helper_sve_ldhdu_le_zss_mte,
5640 gen_helper_sve_ldsdu_le_zss_mte,
5641 gen_helper_sve_lddd_le_zss_mte, } },
5642 { { gen_helper_sve_ldbds_zd_mte,
5643 gen_helper_sve_ldhds_le_zd_mte,
5644 gen_helper_sve_ldsds_le_zd_mte,
5645 NULL, },
5646 { gen_helper_sve_ldbdu_zd_mte,
5647 gen_helper_sve_ldhdu_le_zd_mte,
5648 gen_helper_sve_ldsdu_le_zd_mte,
5649 gen_helper_sve_lddd_le_zd_mte, } } },
5650
5651 /* First-fault */
5652 { { { gen_helper_sve_ldffbds_zsu_mte,
5653 gen_helper_sve_ldffhds_le_zsu_mte,
5654 gen_helper_sve_ldffsds_le_zsu_mte,
5655 NULL, },
5656 { gen_helper_sve_ldffbdu_zsu_mte,
5657 gen_helper_sve_ldffhdu_le_zsu_mte,
5658 gen_helper_sve_ldffsdu_le_zsu_mte,
5659 gen_helper_sve_ldffdd_le_zsu_mte, } },
5660 { { gen_helper_sve_ldffbds_zss_mte,
5661 gen_helper_sve_ldffhds_le_zss_mte,
5662 gen_helper_sve_ldffsds_le_zss_mte,
5663 NULL, },
5664 { gen_helper_sve_ldffbdu_zss_mte,
5665 gen_helper_sve_ldffhdu_le_zss_mte,
5666 gen_helper_sve_ldffsdu_le_zss_mte,
5667 gen_helper_sve_ldffdd_le_zss_mte, } },
5668 { { gen_helper_sve_ldffbds_zd_mte,
5669 gen_helper_sve_ldffhds_le_zd_mte,
5670 gen_helper_sve_ldffsds_le_zd_mte,
5671 NULL, },
5672 { gen_helper_sve_ldffbdu_zd_mte,
5673 gen_helper_sve_ldffhdu_le_zd_mte,
5674 gen_helper_sve_ldffsdu_le_zd_mte,
5675 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5676 { /* Big-endian */
5677 { { { gen_helper_sve_ldbds_zsu_mte,
5678 gen_helper_sve_ldhds_be_zsu_mte,
5679 gen_helper_sve_ldsds_be_zsu_mte,
5680 NULL, },
5681 { gen_helper_sve_ldbdu_zsu_mte,
5682 gen_helper_sve_ldhdu_be_zsu_mte,
5683 gen_helper_sve_ldsdu_be_zsu_mte,
5684 gen_helper_sve_lddd_be_zsu_mte, } },
5685 { { gen_helper_sve_ldbds_zss_mte,
5686 gen_helper_sve_ldhds_be_zss_mte,
5687 gen_helper_sve_ldsds_be_zss_mte,
5688 NULL, },
5689 { gen_helper_sve_ldbdu_zss_mte,
5690 gen_helper_sve_ldhdu_be_zss_mte,
5691 gen_helper_sve_ldsdu_be_zss_mte,
5692 gen_helper_sve_lddd_be_zss_mte, } },
5693 { { gen_helper_sve_ldbds_zd_mte,
5694 gen_helper_sve_ldhds_be_zd_mte,
5695 gen_helper_sve_ldsds_be_zd_mte,
5696 NULL, },
5697 { gen_helper_sve_ldbdu_zd_mte,
5698 gen_helper_sve_ldhdu_be_zd_mte,
5699 gen_helper_sve_ldsdu_be_zd_mte,
5700 gen_helper_sve_lddd_be_zd_mte, } } },
5701
5702 /* First-fault */
5703 { { { gen_helper_sve_ldffbds_zsu_mte,
5704 gen_helper_sve_ldffhds_be_zsu_mte,
5705 gen_helper_sve_ldffsds_be_zsu_mte,
5706 NULL, },
5707 { gen_helper_sve_ldffbdu_zsu_mte,
5708 gen_helper_sve_ldffhdu_be_zsu_mte,
5709 gen_helper_sve_ldffsdu_be_zsu_mte,
5710 gen_helper_sve_ldffdd_be_zsu_mte, } },
5711 { { gen_helper_sve_ldffbds_zss_mte,
5712 gen_helper_sve_ldffhds_be_zss_mte,
5713 gen_helper_sve_ldffsds_be_zss_mte,
5714 NULL, },
5715 { gen_helper_sve_ldffbdu_zss_mte,
5716 gen_helper_sve_ldffhdu_be_zss_mte,
5717 gen_helper_sve_ldffsdu_be_zss_mte,
5718 gen_helper_sve_ldffdd_be_zss_mte, } },
5719 { { gen_helper_sve_ldffbds_zd_mte,
5720 gen_helper_sve_ldffhds_be_zd_mte,
5721 gen_helper_sve_ldffsds_be_zd_mte,
5722 NULL, },
5723 { gen_helper_sve_ldffbdu_zd_mte,
5724 gen_helper_sve_ldffhdu_be_zd_mte,
5725 gen_helper_sve_ldffsdu_be_zd_mte,
5726 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5727};
5728
3a7be554 5729static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5730{
5731 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5732 bool be = s->be_data == MO_BE;
5733 bool mte = s->mte_active[0];
673e9fa6 5734
1402a6b8
RH
5735 if (!dc_isar_feature(aa64_sve, s)) {
5736 return false;
5737 }
765ff97d 5738 s->is_nonstreaming = true;
673e9fa6
RH
5739 if (!sve_access_check(s)) {
5740 return true;
5741 }
5742
5743 switch (a->esz) {
5744 case MO_32:
d28d12f0 5745 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5746 break;
5747 case MO_64:
d28d12f0 5748 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5749 break;
5750 }
5751 assert(fn != NULL);
5752
5753 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5754 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5755 return true;
5756}
5757
3a7be554 5758static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5759{
5760 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5761 bool be = s->be_data == MO_BE;
5762 bool mte = s->mte_active[0];
673e9fa6
RH
5763
5764 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5765 return false;
5766 }
1402a6b8
RH
5767 if (!dc_isar_feature(aa64_sve, s)) {
5768 return false;
5769 }
765ff97d 5770 s->is_nonstreaming = true;
673e9fa6
RH
5771 if (!sve_access_check(s)) {
5772 return true;
5773 }
5774
5775 switch (a->esz) {
5776 case MO_32:
d28d12f0 5777 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5778 break;
5779 case MO_64:
d28d12f0 5780 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5781 break;
5782 }
5783 assert(fn != NULL);
5784
5785 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5786 * by loading the immediate into the scalar parameter.
5787 */
2ccdf94f
RH
5788 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5789 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5790 return true;
5791}
5792
cf327449
SL
5793static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5794{
b17ab470
RH
5795 gen_helper_gvec_mem_scatter *fn = NULL;
5796 bool be = s->be_data == MO_BE;
5797 bool mte = s->mte_active[0];
5798
5799 if (a->esz < a->msz + !a->u) {
5800 return false;
5801 }
cf327449
SL
5802 if (!dc_isar_feature(aa64_sve2, s)) {
5803 return false;
5804 }
765ff97d 5805 s->is_nonstreaming = true;
b17ab470
RH
5806 if (!sve_access_check(s)) {
5807 return true;
5808 }
5809
5810 switch (a->esz) {
5811 case MO_32:
5812 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5813 break;
5814 case MO_64:
5815 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5816 break;
5817 }
5818 assert(fn != NULL);
5819
5820 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5821 cpu_reg(s, a->rm), a->msz, false, fn);
5822 return true;
cf327449
SL
5823}
5824
d28d12f0
RH
5825/* Indexed by [mte][be][xs][msz]. */
5826static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5827 { /* MTE Inactive */
5828 { /* Little-endian */
5829 { gen_helper_sve_stbs_zsu,
5830 gen_helper_sve_sths_le_zsu,
5831 gen_helper_sve_stss_le_zsu, },
5832 { gen_helper_sve_stbs_zss,
5833 gen_helper_sve_sths_le_zss,
5834 gen_helper_sve_stss_le_zss, } },
5835 { /* Big-endian */
5836 { gen_helper_sve_stbs_zsu,
5837 gen_helper_sve_sths_be_zsu,
5838 gen_helper_sve_stss_be_zsu, },
5839 { gen_helper_sve_stbs_zss,
5840 gen_helper_sve_sths_be_zss,
5841 gen_helper_sve_stss_be_zss, } } },
5842 { /* MTE Active */
5843 { /* Little-endian */
5844 { gen_helper_sve_stbs_zsu_mte,
5845 gen_helper_sve_sths_le_zsu_mte,
5846 gen_helper_sve_stss_le_zsu_mte, },
5847 { gen_helper_sve_stbs_zss_mte,
5848 gen_helper_sve_sths_le_zss_mte,
5849 gen_helper_sve_stss_le_zss_mte, } },
5850 { /* Big-endian */
5851 { gen_helper_sve_stbs_zsu_mte,
5852 gen_helper_sve_sths_be_zsu_mte,
5853 gen_helper_sve_stss_be_zsu_mte, },
5854 { gen_helper_sve_stbs_zss_mte,
5855 gen_helper_sve_sths_be_zss_mte,
5856 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5857};
5858
5859/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5860static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5861 { /* MTE Inactive */
5862 { /* Little-endian */
5863 { gen_helper_sve_stbd_zsu,
5864 gen_helper_sve_sthd_le_zsu,
5865 gen_helper_sve_stsd_le_zsu,
5866 gen_helper_sve_stdd_le_zsu, },
5867 { gen_helper_sve_stbd_zss,
5868 gen_helper_sve_sthd_le_zss,
5869 gen_helper_sve_stsd_le_zss,
5870 gen_helper_sve_stdd_le_zss, },
5871 { gen_helper_sve_stbd_zd,
5872 gen_helper_sve_sthd_le_zd,
5873 gen_helper_sve_stsd_le_zd,
5874 gen_helper_sve_stdd_le_zd, } },
5875 { /* Big-endian */
5876 { gen_helper_sve_stbd_zsu,
5877 gen_helper_sve_sthd_be_zsu,
5878 gen_helper_sve_stsd_be_zsu,
5879 gen_helper_sve_stdd_be_zsu, },
5880 { gen_helper_sve_stbd_zss,
5881 gen_helper_sve_sthd_be_zss,
5882 gen_helper_sve_stsd_be_zss,
5883 gen_helper_sve_stdd_be_zss, },
5884 { gen_helper_sve_stbd_zd,
5885 gen_helper_sve_sthd_be_zd,
5886 gen_helper_sve_stsd_be_zd,
5887 gen_helper_sve_stdd_be_zd, } } },
5888 { /* MTE Inactive */
5889 { /* Little-endian */
5890 { gen_helper_sve_stbd_zsu_mte,
5891 gen_helper_sve_sthd_le_zsu_mte,
5892 gen_helper_sve_stsd_le_zsu_mte,
5893 gen_helper_sve_stdd_le_zsu_mte, },
5894 { gen_helper_sve_stbd_zss_mte,
5895 gen_helper_sve_sthd_le_zss_mte,
5896 gen_helper_sve_stsd_le_zss_mte,
5897 gen_helper_sve_stdd_le_zss_mte, },
5898 { gen_helper_sve_stbd_zd_mte,
5899 gen_helper_sve_sthd_le_zd_mte,
5900 gen_helper_sve_stsd_le_zd_mte,
5901 gen_helper_sve_stdd_le_zd_mte, } },
5902 { /* Big-endian */
5903 { gen_helper_sve_stbd_zsu_mte,
5904 gen_helper_sve_sthd_be_zsu_mte,
5905 gen_helper_sve_stsd_be_zsu_mte,
5906 gen_helper_sve_stdd_be_zsu_mte, },
5907 { gen_helper_sve_stbd_zss_mte,
5908 gen_helper_sve_sthd_be_zss_mte,
5909 gen_helper_sve_stsd_be_zss_mte,
5910 gen_helper_sve_stdd_be_zss_mte, },
5911 { gen_helper_sve_stbd_zd_mte,
5912 gen_helper_sve_sthd_be_zd_mte,
5913 gen_helper_sve_stsd_be_zd_mte,
5914 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5915};
5916
3a7be554 5917static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5918{
f6dbf62a 5919 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5920 bool be = s->be_data == MO_BE;
5921 bool mte = s->mte_active[0];
f6dbf62a
RH
5922
5923 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5924 return false;
5925 }
1402a6b8
RH
5926 if (!dc_isar_feature(aa64_sve, s)) {
5927 return false;
5928 }
765ff97d 5929 s->is_nonstreaming = true;
f6dbf62a
RH
5930 if (!sve_access_check(s)) {
5931 return true;
5932 }
5933 switch (a->esz) {
5934 case MO_32:
d28d12f0 5935 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5936 break;
5937 case MO_64:
d28d12f0 5938 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5939 break;
5940 default:
5941 g_assert_not_reached();
5942 }
5943 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5944 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5945 return true;
5946}
dec6cf6b 5947
3a7be554 5948static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5949{
5950 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5951 bool be = s->be_data == MO_BE;
5952 bool mte = s->mte_active[0];
408ecde9
RH
5953
5954 if (a->esz < a->msz) {
5955 return false;
5956 }
1402a6b8
RH
5957 if (!dc_isar_feature(aa64_sve, s)) {
5958 return false;
5959 }
765ff97d 5960 s->is_nonstreaming = true;
408ecde9
RH
5961 if (!sve_access_check(s)) {
5962 return true;
5963 }
5964
5965 switch (a->esz) {
5966 case MO_32:
d28d12f0 5967 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5968 break;
5969 case MO_64:
d28d12f0 5970 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5971 break;
5972 }
5973 assert(fn != NULL);
5974
5975 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5976 * by loading the immediate into the scalar parameter.
5977 */
2ccdf94f
RH
5978 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5979 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5980 return true;
5981}
5982
6ebca45f
SL
5983static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5984{
b17ab470
RH
5985 gen_helper_gvec_mem_scatter *fn;
5986 bool be = s->be_data == MO_BE;
5987 bool mte = s->mte_active[0];
5988
5989 if (a->esz < a->msz) {
5990 return false;
5991 }
6ebca45f
SL
5992 if (!dc_isar_feature(aa64_sve2, s)) {
5993 return false;
5994 }
765ff97d 5995 s->is_nonstreaming = true;
b17ab470
RH
5996 if (!sve_access_check(s)) {
5997 return true;
5998 }
5999
6000 switch (a->esz) {
6001 case MO_32:
6002 fn = scatter_store_fn32[mte][be][0][a->msz];
6003 break;
6004 case MO_64:
6005 fn = scatter_store_fn64[mte][be][2][a->msz];
6006 break;
6007 default:
6008 g_assert_not_reached();
6009 }
6010
6011 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6012 cpu_reg(s, a->rm), a->msz, true, fn);
6013 return true;
6ebca45f
SL
6014}
6015
dec6cf6b
RH
6016/*
6017 * Prefetches
6018 */
6019
3a7be554 6020static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b 6021{
1402a6b8
RH
6022 if (!dc_isar_feature(aa64_sve, s)) {
6023 return false;
6024 }
dec6cf6b 6025 /* Prefetch is a nop within QEMU. */
2f95a3b0 6026 (void)sve_access_check(s);
dec6cf6b
RH
6027 return true;
6028}
6029
3a7be554 6030static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b 6031{
1402a6b8 6032 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
dec6cf6b
RH
6033 return false;
6034 }
6035 /* Prefetch is a nop within QEMU. */
2f95a3b0 6036 (void)sve_access_check(s);
dec6cf6b
RH
6037 return true;
6038}
a2103582 6039
e1d1a643
RH
6040static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
6041{
6042 if (!dc_isar_feature(aa64_sve, s)) {
6043 return false;
6044 }
6045 /* Prefetch is a nop within QEMU. */
6046 s->is_nonstreaming = true;
6047 (void)sve_access_check(s);
6048 return true;
6049}
6050
a2103582
RH
6051/*
6052 * Move Prefix
6053 *
6054 * TODO: The implementation so far could handle predicated merging movprfx.
6055 * The helper functions as written take an extra source register to
6056 * use in the operation, but the result is only written when predication
6057 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6058 * to allow the final write back to the destination to be unconditional.
6059 * For predicated zeroing movprfx, we need to rearrange the helpers to
6060 * allow the final write back to zero inactives.
6061 *
6062 * In the meantime, just emit the moves.
6063 */
6064
4b0b37e9
RH
6065TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
6066TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
6067TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5dad1ba5
RH
6068
6069/*
6070 * SVE2 Integer Multiply - Unpredicated
6071 */
6072
b262215b 6073TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 6074
bd394cf5
RH
6075static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6076 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6077 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6078};
6079TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6080 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6081
bd394cf5
RH
6082static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6083 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6084 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6085};
6086TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6087 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6088
bd394cf5
RH
6089TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6090 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6091
bd394cf5
RH
6092static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6093 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6094 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6095};
6096TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6097 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6098
bd394cf5
RH
6099static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6100 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6101 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6102};
6103TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6104 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6105
d4b1e59d
RH
6106/*
6107 * SVE2 Integer - Predicated
6108 */
6109
5880bdc0
RH
6110static gen_helper_gvec_4 * const sadlp_fns[4] = {
6111 NULL, gen_helper_sve2_sadalp_zpzz_h,
6112 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6113};
6114TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6115 sadlp_fns[a->esz], a, 0)
d4b1e59d 6116
5880bdc0
RH
6117static gen_helper_gvec_4 * const uadlp_fns[4] = {
6118 NULL, gen_helper_sve2_uadalp_zpzz_h,
6119 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6120};
6121TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6122 uadlp_fns[a->esz], a, 0)
db366da8
RH
6123
6124/*
6125 * SVE2 integer unary operations (predicated)
6126 */
6127
b2c00961
RH
6128TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6129 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6130
b2c00961
RH
6131TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6132 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6133
b2c00961
RH
6134static gen_helper_gvec_3 * const sqabs_fns[4] = {
6135 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6136 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6137};
6138TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6139
b2c00961
RH
6140static gen_helper_gvec_3 * const sqneg_fns[4] = {
6141 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6142 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6143};
6144TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 6145
5880bdc0
RH
6146DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6147DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6148DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 6149
5880bdc0
RH
6150DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6151DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6152DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 6153
5880bdc0
RH
6154DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6155DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6156DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 6157
5880bdc0
RH
6158DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6159DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6160DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 6161
5880bdc0
RH
6162DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6163DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6164DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6165DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6166DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6167
5880bdc0
RH
6168DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6169DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6170DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6171DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6172DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6173DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6174
6175/*
6176 * SVE2 Widening Integer Arithmetic
6177 */
6178
615f19fe
RH
6179static gen_helper_gvec_3 * const saddl_fns[4] = {
6180 NULL, gen_helper_sve2_saddl_h,
6181 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6182};
6183TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6184 saddl_fns[a->esz], a, 0)
6185TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6186 saddl_fns[a->esz], a, 3)
6187TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6188 saddl_fns[a->esz], a, 2)
6189
6190static gen_helper_gvec_3 * const ssubl_fns[4] = {
6191 NULL, gen_helper_sve2_ssubl_h,
6192 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6193};
6194TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6195 ssubl_fns[a->esz], a, 0)
6196TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6197 ssubl_fns[a->esz], a, 3)
6198TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6199 ssubl_fns[a->esz], a, 2)
6200TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6201 ssubl_fns[a->esz], a, 1)
6202
6203static gen_helper_gvec_3 * const sabdl_fns[4] = {
6204 NULL, gen_helper_sve2_sabdl_h,
6205 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6206};
6207TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6208 sabdl_fns[a->esz], a, 0)
6209TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6210 sabdl_fns[a->esz], a, 3)
6211
6212static gen_helper_gvec_3 * const uaddl_fns[4] = {
6213 NULL, gen_helper_sve2_uaddl_h,
6214 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6215};
6216TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6217 uaddl_fns[a->esz], a, 0)
6218TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6219 uaddl_fns[a->esz], a, 3)
6220
6221static gen_helper_gvec_3 * const usubl_fns[4] = {
6222 NULL, gen_helper_sve2_usubl_h,
6223 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6224};
6225TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6226 usubl_fns[a->esz], a, 0)
6227TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6228 usubl_fns[a->esz], a, 3)
6229
6230static gen_helper_gvec_3 * const uabdl_fns[4] = {
6231 NULL, gen_helper_sve2_uabdl_h,
6232 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6233};
6234TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6235 uabdl_fns[a->esz], a, 0)
6236TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6237 uabdl_fns[a->esz], a, 3)
6238
6239static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6240 NULL, gen_helper_sve2_sqdmull_zzz_h,
6241 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6242};
6243TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6244 sqdmull_fns[a->esz], a, 0)
6245TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6246 sqdmull_fns[a->esz], a, 3)
6247
6248static gen_helper_gvec_3 * const smull_fns[4] = {
6249 NULL, gen_helper_sve2_smull_zzz_h,
6250 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6251};
6252TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6253 smull_fns[a->esz], a, 0)
6254TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6255 smull_fns[a->esz], a, 3)
6256
6257static gen_helper_gvec_3 * const umull_fns[4] = {
6258 NULL, gen_helper_sve2_umull_zzz_h,
6259 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6260};
6261TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6262 umull_fns[a->esz], a, 0)
6263TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6264 umull_fns[a->esz], a, 3)
6265
6266static gen_helper_gvec_3 * const eoril_fns[4] = {
6267 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6268 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6269};
6270TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6271TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6272
e3a56131
RH
6273static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6274{
6275 static gen_helper_gvec_3 * const fns[4] = {
6276 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6277 NULL, gen_helper_sve2_pmull_d,
6278 };
4464ee36
RH
6279
6280 if (a->esz == 0) {
6281 if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
6282 return false;
6283 }
6284 s->is_nonstreaming = true;
6285 } else if (!dc_isar_feature(aa64_sve, s)) {
e3a56131
RH
6286 return false;
6287 }
615f19fe 6288 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6289}
6290
615f19fe
RH
6291TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6292TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6293
615f19fe
RH
6294static gen_helper_gvec_3 * const saddw_fns[4] = {
6295 NULL, gen_helper_sve2_saddw_h,
6296 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6297};
6298TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6299TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6300
615f19fe
RH
6301static gen_helper_gvec_3 * const ssubw_fns[4] = {
6302 NULL, gen_helper_sve2_ssubw_h,
6303 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6304};
6305TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6306TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6307
615f19fe
RH
6308static gen_helper_gvec_3 * const uaddw_fns[4] = {
6309 NULL, gen_helper_sve2_uaddw_h,
6310 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6311};
6312TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6313TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6314
615f19fe
RH
6315static gen_helper_gvec_3 * const usubw_fns[4] = {
6316 NULL, gen_helper_sve2_usubw_h,
6317 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6318};
6319TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6320TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6321
6322static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6323{
6324 int top = imm & 1;
6325 int shl = imm >> 1;
6326 int halfbits = 4 << vece;
6327
6328 if (top) {
6329 if (shl == halfbits) {
6330 TCGv_vec t = tcg_temp_new_vec_matching(d);
6331 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6332 tcg_gen_and_vec(vece, d, n, t);
6333 tcg_temp_free_vec(t);
6334 } else {
6335 tcg_gen_sari_vec(vece, d, n, halfbits);
6336 tcg_gen_shli_vec(vece, d, d, shl);
6337 }
6338 } else {
6339 tcg_gen_shli_vec(vece, d, n, halfbits);
6340 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6341 }
6342}
6343
6344static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6345{
6346 int halfbits = 4 << vece;
6347 int top = imm & 1;
6348 int shl = (imm >> 1);
6349 int shift;
6350 uint64_t mask;
6351
6352 mask = MAKE_64BIT_MASK(0, halfbits);
6353 mask <<= shl;
6354 mask = dup_const(vece, mask);
6355
6356 shift = shl - top * halfbits;
6357 if (shift < 0) {
6358 tcg_gen_shri_i64(d, n, -shift);
6359 } else {
6360 tcg_gen_shli_i64(d, n, shift);
6361 }
6362 tcg_gen_andi_i64(d, d, mask);
6363}
6364
6365static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6366{
6367 gen_ushll_i64(MO_16, d, n, imm);
6368}
6369
6370static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6371{
6372 gen_ushll_i64(MO_32, d, n, imm);
6373}
6374
6375static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6376{
6377 gen_ushll_i64(MO_64, d, n, imm);
6378}
6379
6380static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6381{
6382 int halfbits = 4 << vece;
6383 int top = imm & 1;
6384 int shl = imm >> 1;
6385
6386 if (top) {
6387 if (shl == halfbits) {
6388 TCGv_vec t = tcg_temp_new_vec_matching(d);
6389 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6390 tcg_gen_and_vec(vece, d, n, t);
6391 tcg_temp_free_vec(t);
6392 } else {
6393 tcg_gen_shri_vec(vece, d, n, halfbits);
6394 tcg_gen_shli_vec(vece, d, d, shl);
6395 }
6396 } else {
6397 if (shl == 0) {
6398 TCGv_vec t = tcg_temp_new_vec_matching(d);
6399 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6400 tcg_gen_and_vec(vece, d, n, t);
6401 tcg_temp_free_vec(t);
6402 } else {
6403 tcg_gen_shli_vec(vece, d, n, halfbits);
6404 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6405 }
6406 }
6407}
6408
5a528bb5
RH
6409static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6410 const GVecGen2i ops[3], bool sel)
4269fef1 6411{
4269fef1 6412
5a528bb5 6413 if (a->esz < 0 || a->esz > 2) {
4269fef1
RH
6414 return false;
6415 }
6416 if (sve_access_check(s)) {
6417 unsigned vsz = vec_full_reg_size(s);
6418 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6419 vec_full_reg_offset(s, a->rn),
6420 vsz, vsz, (a->imm << 1) | sel,
5a528bb5 6421 &ops[a->esz]);
4269fef1
RH
6422 }
6423 return true;
6424}
6425
5a528bb5
RH
6426static const TCGOpcode sshll_list[] = {
6427 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6428};
6429static const GVecGen2i sshll_ops[3] = {
6430 { .fniv = gen_sshll_vec,
6431 .opt_opc = sshll_list,
6432 .fno = gen_helper_sve2_sshll_h,
6433 .vece = MO_16 },
6434 { .fniv = gen_sshll_vec,
6435 .opt_opc = sshll_list,
6436 .fno = gen_helper_sve2_sshll_s,
6437 .vece = MO_32 },
6438 { .fniv = gen_sshll_vec,
6439 .opt_opc = sshll_list,
6440 .fno = gen_helper_sve2_sshll_d,
6441 .vece = MO_64 }
6442};
6443TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6444TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
4269fef1 6445
5a528bb5
RH
6446static const TCGOpcode ushll_list[] = {
6447 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6448};
6449static const GVecGen2i ushll_ops[3] = {
6450 { .fni8 = gen_ushll16_i64,
6451 .fniv = gen_ushll_vec,
6452 .opt_opc = ushll_list,
6453 .fno = gen_helper_sve2_ushll_h,
6454 .vece = MO_16 },
6455 { .fni8 = gen_ushll32_i64,
6456 .fniv = gen_ushll_vec,
6457 .opt_opc = ushll_list,
6458 .fno = gen_helper_sve2_ushll_s,
6459 .vece = MO_32 },
6460 { .fni8 = gen_ushll64_i64,
6461 .fniv = gen_ushll_vec,
6462 .opt_opc = ushll_list,
6463 .fno = gen_helper_sve2_ushll_d,
6464 .vece = MO_64 },
6465};
6466TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6467TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
cb9c33b8 6468
615f19fe
RH
6469static gen_helper_gvec_3 * const bext_fns[4] = {
6470 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6471 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6472};
ca363d23
RH
6473TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6474 bext_fns[a->esz], a, 0)
ed4a6387 6475
615f19fe
RH
6476static gen_helper_gvec_3 * const bdep_fns[4] = {
6477 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6478 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6479};
ca363d23
RH
6480TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6481 bdep_fns[a->esz], a, 0)
ed4a6387 6482
615f19fe
RH
6483static gen_helper_gvec_3 * const bgrp_fns[4] = {
6484 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6485 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6486};
ca363d23
RH
6487TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6488 bgrp_fns[a->esz], a, 0)
ed4a6387 6489
615f19fe
RH
6490static gen_helper_gvec_3 * const cadd_fns[4] = {
6491 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6492 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6493};
6494TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6495 cadd_fns[a->esz], a, 0)
6496TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6497 cadd_fns[a->esz], a, 1)
6498
6499static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6500 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6501 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6502};
6503TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6504 sqcadd_fns[a->esz], a, 0)
6505TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6506 sqcadd_fns[a->esz], a, 1)
38650638 6507
eeb4e84d
RH
6508static gen_helper_gvec_4 * const sabal_fns[4] = {
6509 NULL, gen_helper_sve2_sabal_h,
6510 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6511};
6512TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6513TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6514
eeb4e84d
RH
6515static gen_helper_gvec_4 * const uabal_fns[4] = {
6516 NULL, gen_helper_sve2_uabal_h,
6517 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6518};
6519TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6520TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6521
6522static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6523{
6524 static gen_helper_gvec_4 * const fns[2] = {
6525 gen_helper_sve2_adcl_s,
6526 gen_helper_sve2_adcl_d,
6527 };
6528 /*
6529 * Note that in this case the ESZ field encodes both size and sign.
6530 * Split out 'subtract' into bit 1 of the data field for the helper.
6531 */
eeb4e84d 6532 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6533}
6534
eeb4e84d
RH
6535TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6536TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6537
f2be26a5
RH
6538TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6539TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6540TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6541TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6542TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6543TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6544
79828dcb
RH
6545TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6546TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d 6547
6100d084
RH
6548static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6549 const GVecGen2 ops[3])
5ff2838d 6550{
6100d084 6551 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
5ff2838d
RH
6552 return false;
6553 }
6554 if (sve_access_check(s)) {
6555 unsigned vsz = vec_full_reg_size(s);
6556 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6557 vec_full_reg_offset(s, a->rn),
6558 vsz, vsz, &ops[a->esz]);
6559 }
6560 return true;
6561}
6562
6563static const TCGOpcode sqxtn_list[] = {
6564 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6565};
6566
6567static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6568{
6569 TCGv_vec t = tcg_temp_new_vec_matching(d);
6570 int halfbits = 4 << vece;
6571 int64_t mask = (1ull << halfbits) - 1;
6572 int64_t min = -1ull << (halfbits - 1);
6573 int64_t max = -min - 1;
6574
6575 tcg_gen_dupi_vec(vece, t, min);
6576 tcg_gen_smax_vec(vece, d, n, t);
6577 tcg_gen_dupi_vec(vece, t, max);
6578 tcg_gen_smin_vec(vece, d, d, t);
6579 tcg_gen_dupi_vec(vece, t, mask);
6580 tcg_gen_and_vec(vece, d, d, t);
6581 tcg_temp_free_vec(t);
6582}
6583
6100d084
RH
6584static const GVecGen2 sqxtnb_ops[3] = {
6585 { .fniv = gen_sqxtnb_vec,
6586 .opt_opc = sqxtn_list,
6587 .fno = gen_helper_sve2_sqxtnb_h,
6588 .vece = MO_16 },
6589 { .fniv = gen_sqxtnb_vec,
6590 .opt_opc = sqxtn_list,
6591 .fno = gen_helper_sve2_sqxtnb_s,
6592 .vece = MO_32 },
6593 { .fniv = gen_sqxtnb_vec,
6594 .opt_opc = sqxtn_list,
6595 .fno = gen_helper_sve2_sqxtnb_d,
6596 .vece = MO_64 },
6597};
6598TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
5ff2838d
RH
6599
6600static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6601{
6602 TCGv_vec t = tcg_temp_new_vec_matching(d);
6603 int halfbits = 4 << vece;
6604 int64_t mask = (1ull << halfbits) - 1;
6605 int64_t min = -1ull << (halfbits - 1);
6606 int64_t max = -min - 1;
6607
6608 tcg_gen_dupi_vec(vece, t, min);
6609 tcg_gen_smax_vec(vece, n, n, t);
6610 tcg_gen_dupi_vec(vece, t, max);
6611 tcg_gen_smin_vec(vece, n, n, t);
6612 tcg_gen_shli_vec(vece, n, n, halfbits);
6613 tcg_gen_dupi_vec(vece, t, mask);
6614 tcg_gen_bitsel_vec(vece, d, t, d, n);
6615 tcg_temp_free_vec(t);
6616}
6617
6100d084
RH
6618static const GVecGen2 sqxtnt_ops[3] = {
6619 { .fniv = gen_sqxtnt_vec,
6620 .opt_opc = sqxtn_list,
6621 .load_dest = true,
6622 .fno = gen_helper_sve2_sqxtnt_h,
6623 .vece = MO_16 },
6624 { .fniv = gen_sqxtnt_vec,
6625 .opt_opc = sqxtn_list,
6626 .load_dest = true,
6627 .fno = gen_helper_sve2_sqxtnt_s,
6628 .vece = MO_32 },
6629 { .fniv = gen_sqxtnt_vec,
6630 .opt_opc = sqxtn_list,
6631 .load_dest = true,
6632 .fno = gen_helper_sve2_sqxtnt_d,
6633 .vece = MO_64 },
6634};
6635TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
5ff2838d
RH
6636
6637static const TCGOpcode uqxtn_list[] = {
6638 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6639};
6640
6641static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6642{
6643 TCGv_vec t = tcg_temp_new_vec_matching(d);
6644 int halfbits = 4 << vece;
6645 int64_t max = (1ull << halfbits) - 1;
6646
6647 tcg_gen_dupi_vec(vece, t, max);
6648 tcg_gen_umin_vec(vece, d, n, t);
6649 tcg_temp_free_vec(t);
6650}
6651
6100d084
RH
6652static const GVecGen2 uqxtnb_ops[3] = {
6653 { .fniv = gen_uqxtnb_vec,
6654 .opt_opc = uqxtn_list,
6655 .fno = gen_helper_sve2_uqxtnb_h,
6656 .vece = MO_16 },
6657 { .fniv = gen_uqxtnb_vec,
6658 .opt_opc = uqxtn_list,
6659 .fno = gen_helper_sve2_uqxtnb_s,
6660 .vece = MO_32 },
6661 { .fniv = gen_uqxtnb_vec,
6662 .opt_opc = uqxtn_list,
6663 .fno = gen_helper_sve2_uqxtnb_d,
6664 .vece = MO_64 },
6665};
6666TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
5ff2838d
RH
6667
6668static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6669{
6670 TCGv_vec t = tcg_temp_new_vec_matching(d);
6671 int halfbits = 4 << vece;
6672 int64_t max = (1ull << halfbits) - 1;
6673
6674 tcg_gen_dupi_vec(vece, t, max);
6675 tcg_gen_umin_vec(vece, n, n, t);
6676 tcg_gen_shli_vec(vece, n, n, halfbits);
6677 tcg_gen_bitsel_vec(vece, d, t, d, n);
6678 tcg_temp_free_vec(t);
6679}
6680
6100d084
RH
6681static const GVecGen2 uqxtnt_ops[3] = {
6682 { .fniv = gen_uqxtnt_vec,
6683 .opt_opc = uqxtn_list,
6684 .load_dest = true,
6685 .fno = gen_helper_sve2_uqxtnt_h,
6686 .vece = MO_16 },
6687 { .fniv = gen_uqxtnt_vec,
6688 .opt_opc = uqxtn_list,
6689 .load_dest = true,
6690 .fno = gen_helper_sve2_uqxtnt_s,
6691 .vece = MO_32 },
6692 { .fniv = gen_uqxtnt_vec,
6693 .opt_opc = uqxtn_list,
6694 .load_dest = true,
6695 .fno = gen_helper_sve2_uqxtnt_d,
6696 .vece = MO_64 },
6697};
6698TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
5ff2838d
RH
6699
6700static const TCGOpcode sqxtun_list[] = {
6701 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6702};
6703
6704static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6705{
6706 TCGv_vec t = tcg_temp_new_vec_matching(d);
6707 int halfbits = 4 << vece;
6708 int64_t max = (1ull << halfbits) - 1;
6709
6710 tcg_gen_dupi_vec(vece, t, 0);
6711 tcg_gen_smax_vec(vece, d, n, t);
6712 tcg_gen_dupi_vec(vece, t, max);
6713 tcg_gen_umin_vec(vece, d, d, t);
6714 tcg_temp_free_vec(t);
6715}
6716
6100d084
RH
6717static const GVecGen2 sqxtunb_ops[3] = {
6718 { .fniv = gen_sqxtunb_vec,
6719 .opt_opc = sqxtun_list,
6720 .fno = gen_helper_sve2_sqxtunb_h,
6721 .vece = MO_16 },
6722 { .fniv = gen_sqxtunb_vec,
6723 .opt_opc = sqxtun_list,
6724 .fno = gen_helper_sve2_sqxtunb_s,
6725 .vece = MO_32 },
6726 { .fniv = gen_sqxtunb_vec,
6727 .opt_opc = sqxtun_list,
6728 .fno = gen_helper_sve2_sqxtunb_d,
6729 .vece = MO_64 },
6730};
6731TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
5ff2838d
RH
6732
6733static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6734{
6735 TCGv_vec t = tcg_temp_new_vec_matching(d);
6736 int halfbits = 4 << vece;
6737 int64_t max = (1ull << halfbits) - 1;
6738
6739 tcg_gen_dupi_vec(vece, t, 0);
6740 tcg_gen_smax_vec(vece, n, n, t);
6741 tcg_gen_dupi_vec(vece, t, max);
6742 tcg_gen_umin_vec(vece, n, n, t);
6743 tcg_gen_shli_vec(vece, n, n, halfbits);
6744 tcg_gen_bitsel_vec(vece, d, t, d, n);
6745 tcg_temp_free_vec(t);
6746}
6747
6100d084
RH
6748static const GVecGen2 sqxtunt_ops[3] = {
6749 { .fniv = gen_sqxtunt_vec,
6750 .opt_opc = sqxtun_list,
6751 .load_dest = true,
6752 .fno = gen_helper_sve2_sqxtunt_h,
6753 .vece = MO_16 },
6754 { .fniv = gen_sqxtunt_vec,
6755 .opt_opc = sqxtun_list,
6756 .load_dest = true,
6757 .fno = gen_helper_sve2_sqxtunt_s,
6758 .vece = MO_32 },
6759 { .fniv = gen_sqxtunt_vec,
6760 .opt_opc = sqxtun_list,
6761 .load_dest = true,
6762 .fno = gen_helper_sve2_sqxtunt_d,
6763 .vece = MO_64 },
6764};
6765TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
46d111b2 6766
f7f2f0fa
RH
6767static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6768 const GVecGen2i ops[3])
46d111b2 6769{
f7f2f0fa 6770 if (a->esz < 0 || a->esz > MO_32) {
46d111b2
RH
6771 return false;
6772 }
6773 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6774 if (sve_access_check(s)) {
6775 unsigned vsz = vec_full_reg_size(s);
6776 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6777 vec_full_reg_offset(s, a->rn),
6778 vsz, vsz, a->imm, &ops[a->esz]);
6779 }
6780 return true;
6781}
6782
6783static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6784{
6785 int halfbits = 4 << vece;
6786 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6787
6788 tcg_gen_shri_i64(d, n, shr);
6789 tcg_gen_andi_i64(d, d, mask);
6790}
6791
6792static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6793{
6794 gen_shrnb_i64(MO_16, d, n, shr);
6795}
6796
6797static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6798{
6799 gen_shrnb_i64(MO_32, d, n, shr);
6800}
6801
6802static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6803{
6804 gen_shrnb_i64(MO_64, d, n, shr);
6805}
6806
6807static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6808{
6809 TCGv_vec t = tcg_temp_new_vec_matching(d);
6810 int halfbits = 4 << vece;
6811 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6812
6813 tcg_gen_shri_vec(vece, n, n, shr);
6814 tcg_gen_dupi_vec(vece, t, mask);
6815 tcg_gen_and_vec(vece, d, n, t);
6816 tcg_temp_free_vec(t);
6817}
6818
f7f2f0fa
RH
6819static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6820static const GVecGen2i shrnb_ops[3] = {
6821 { .fni8 = gen_shrnb16_i64,
6822 .fniv = gen_shrnb_vec,
6823 .opt_opc = shrnb_vec_list,
6824 .fno = gen_helper_sve2_shrnb_h,
6825 .vece = MO_16 },
6826 { .fni8 = gen_shrnb32_i64,
6827 .fniv = gen_shrnb_vec,
6828 .opt_opc = shrnb_vec_list,
6829 .fno = gen_helper_sve2_shrnb_s,
6830 .vece = MO_32 },
6831 { .fni8 = gen_shrnb64_i64,
6832 .fniv = gen_shrnb_vec,
6833 .opt_opc = shrnb_vec_list,
6834 .fno = gen_helper_sve2_shrnb_d,
6835 .vece = MO_64 },
6836};
6837TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
46d111b2
RH
6838
6839static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6840{
6841 int halfbits = 4 << vece;
6842 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6843
6844 tcg_gen_shli_i64(n, n, halfbits - shr);
6845 tcg_gen_andi_i64(n, n, ~mask);
6846 tcg_gen_andi_i64(d, d, mask);
6847 tcg_gen_or_i64(d, d, n);
6848}
6849
6850static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6851{
6852 gen_shrnt_i64(MO_16, d, n, shr);
6853}
6854
6855static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6856{
6857 gen_shrnt_i64(MO_32, d, n, shr);
6858}
6859
6860static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6861{
6862 tcg_gen_shri_i64(n, n, shr);
6863 tcg_gen_deposit_i64(d, d, n, 32, 32);
6864}
6865
6866static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6867{
6868 TCGv_vec t = tcg_temp_new_vec_matching(d);
6869 int halfbits = 4 << vece;
6870 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6871
6872 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6873 tcg_gen_dupi_vec(vece, t, mask);
6874 tcg_gen_bitsel_vec(vece, d, t, d, n);
6875 tcg_temp_free_vec(t);
6876}
6877
f7f2f0fa
RH
6878static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6879static const GVecGen2i shrnt_ops[3] = {
6880 { .fni8 = gen_shrnt16_i64,
6881 .fniv = gen_shrnt_vec,
6882 .opt_opc = shrnt_vec_list,
6883 .load_dest = true,
6884 .fno = gen_helper_sve2_shrnt_h,
6885 .vece = MO_16 },
6886 { .fni8 = gen_shrnt32_i64,
6887 .fniv = gen_shrnt_vec,
6888 .opt_opc = shrnt_vec_list,
6889 .load_dest = true,
6890 .fno = gen_helper_sve2_shrnt_s,
6891 .vece = MO_32 },
6892 { .fni8 = gen_shrnt64_i64,
6893 .fniv = gen_shrnt_vec,
6894 .opt_opc = shrnt_vec_list,
6895 .load_dest = true,
6896 .fno = gen_helper_sve2_shrnt_d,
6897 .vece = MO_64 },
6898};
6899TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
46d111b2 6900
f7f2f0fa
RH
6901static const GVecGen2i rshrnb_ops[3] = {
6902 { .fno = gen_helper_sve2_rshrnb_h },
6903 { .fno = gen_helper_sve2_rshrnb_s },
6904 { .fno = gen_helper_sve2_rshrnb_d },
6905};
6906TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
46d111b2 6907
f7f2f0fa
RH
6908static const GVecGen2i rshrnt_ops[3] = {
6909 { .fno = gen_helper_sve2_rshrnt_h },
6910 { .fno = gen_helper_sve2_rshrnt_s },
6911 { .fno = gen_helper_sve2_rshrnt_d },
6912};
6913TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
81fd3e6e
RH
6914
6915static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6916 TCGv_vec n, int64_t shr)
6917{
6918 TCGv_vec t = tcg_temp_new_vec_matching(d);
6919 int halfbits = 4 << vece;
6920
6921 tcg_gen_sari_vec(vece, n, n, shr);
6922 tcg_gen_dupi_vec(vece, t, 0);
6923 tcg_gen_smax_vec(vece, n, n, t);
6924 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6925 tcg_gen_umin_vec(vece, d, n, t);
6926 tcg_temp_free_vec(t);
6927}
6928
f7f2f0fa
RH
6929static const TCGOpcode sqshrunb_vec_list[] = {
6930 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6931};
6932static const GVecGen2i sqshrunb_ops[3] = {
6933 { .fniv = gen_sqshrunb_vec,
6934 .opt_opc = sqshrunb_vec_list,
6935 .fno = gen_helper_sve2_sqshrunb_h,
6936 .vece = MO_16 },
6937 { .fniv = gen_sqshrunb_vec,
6938 .opt_opc = sqshrunb_vec_list,
6939 .fno = gen_helper_sve2_sqshrunb_s,
6940 .vece = MO_32 },
6941 { .fniv = gen_sqshrunb_vec,
6942 .opt_opc = sqshrunb_vec_list,
6943 .fno = gen_helper_sve2_sqshrunb_d,
6944 .vece = MO_64 },
6945};
6946TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
81fd3e6e
RH
6947
6948static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6949 TCGv_vec n, int64_t shr)
6950{
6951 TCGv_vec t = tcg_temp_new_vec_matching(d);
6952 int halfbits = 4 << vece;
6953
6954 tcg_gen_sari_vec(vece, n, n, shr);
6955 tcg_gen_dupi_vec(vece, t, 0);
6956 tcg_gen_smax_vec(vece, n, n, t);
6957 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6958 tcg_gen_umin_vec(vece, n, n, t);
6959 tcg_gen_shli_vec(vece, n, n, halfbits);
6960 tcg_gen_bitsel_vec(vece, d, t, d, n);
6961 tcg_temp_free_vec(t);
6962}
6963
f7f2f0fa
RH
6964static const TCGOpcode sqshrunt_vec_list[] = {
6965 INDEX_op_shli_vec, INDEX_op_sari_vec,
6966 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6967};
6968static const GVecGen2i sqshrunt_ops[3] = {
6969 { .fniv = gen_sqshrunt_vec,
6970 .opt_opc = sqshrunt_vec_list,
6971 .load_dest = true,
6972 .fno = gen_helper_sve2_sqshrunt_h,
6973 .vece = MO_16 },
6974 { .fniv = gen_sqshrunt_vec,
6975 .opt_opc = sqshrunt_vec_list,
6976 .load_dest = true,
6977 .fno = gen_helper_sve2_sqshrunt_s,
6978 .vece = MO_32 },
6979 { .fniv = gen_sqshrunt_vec,
6980 .opt_opc = sqshrunt_vec_list,
6981 .load_dest = true,
6982 .fno = gen_helper_sve2_sqshrunt_d,
6983 .vece = MO_64 },
6984};
6985TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
81fd3e6e 6986
f7f2f0fa
RH
6987static const GVecGen2i sqrshrunb_ops[3] = {
6988 { .fno = gen_helper_sve2_sqrshrunb_h },
6989 { .fno = gen_helper_sve2_sqrshrunb_s },
6990 { .fno = gen_helper_sve2_sqrshrunb_d },
6991};
6992TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
81fd3e6e 6993
f7f2f0fa
RH
6994static const GVecGen2i sqrshrunt_ops[3] = {
6995 { .fno = gen_helper_sve2_sqrshrunt_h },
6996 { .fno = gen_helper_sve2_sqrshrunt_s },
6997 { .fno = gen_helper_sve2_sqrshrunt_d },
6998};
6999TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
c13418da 7000
743bb147
RH
7001static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7002 TCGv_vec n, int64_t shr)
7003{
7004 TCGv_vec t = tcg_temp_new_vec_matching(d);
7005 int halfbits = 4 << vece;
7006 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7007 int64_t min = -max - 1;
7008
7009 tcg_gen_sari_vec(vece, n, n, shr);
7010 tcg_gen_dupi_vec(vece, t, min);
7011 tcg_gen_smax_vec(vece, n, n, t);
7012 tcg_gen_dupi_vec(vece, t, max);
7013 tcg_gen_smin_vec(vece, n, n, t);
7014 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7015 tcg_gen_and_vec(vece, d, n, t);
7016 tcg_temp_free_vec(t);
7017}
7018
f7f2f0fa
RH
7019static const TCGOpcode sqshrnb_vec_list[] = {
7020 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7021};
7022static const GVecGen2i sqshrnb_ops[3] = {
7023 { .fniv = gen_sqshrnb_vec,
7024 .opt_opc = sqshrnb_vec_list,
7025 .fno = gen_helper_sve2_sqshrnb_h,
7026 .vece = MO_16 },
7027 { .fniv = gen_sqshrnb_vec,
7028 .opt_opc = sqshrnb_vec_list,
7029 .fno = gen_helper_sve2_sqshrnb_s,
7030 .vece = MO_32 },
7031 { .fniv = gen_sqshrnb_vec,
7032 .opt_opc = sqshrnb_vec_list,
7033 .fno = gen_helper_sve2_sqshrnb_d,
7034 .vece = MO_64 },
7035};
7036TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
743bb147
RH
7037
7038static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7039 TCGv_vec n, int64_t shr)
7040{
7041 TCGv_vec t = tcg_temp_new_vec_matching(d);
7042 int halfbits = 4 << vece;
7043 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7044 int64_t min = -max - 1;
7045
7046 tcg_gen_sari_vec(vece, n, n, shr);
7047 tcg_gen_dupi_vec(vece, t, min);
7048 tcg_gen_smax_vec(vece, n, n, t);
7049 tcg_gen_dupi_vec(vece, t, max);
7050 tcg_gen_smin_vec(vece, n, n, t);
7051 tcg_gen_shli_vec(vece, n, n, halfbits);
7052 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7053 tcg_gen_bitsel_vec(vece, d, t, d, n);
7054 tcg_temp_free_vec(t);
7055}
7056
f7f2f0fa
RH
7057static const TCGOpcode sqshrnt_vec_list[] = {
7058 INDEX_op_shli_vec, INDEX_op_sari_vec,
7059 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7060};
7061static const GVecGen2i sqshrnt_ops[3] = {
7062 { .fniv = gen_sqshrnt_vec,
7063 .opt_opc = sqshrnt_vec_list,
7064 .load_dest = true,
7065 .fno = gen_helper_sve2_sqshrnt_h,
7066 .vece = MO_16 },
7067 { .fniv = gen_sqshrnt_vec,
7068 .opt_opc = sqshrnt_vec_list,
7069 .load_dest = true,
7070 .fno = gen_helper_sve2_sqshrnt_s,
7071 .vece = MO_32 },
7072 { .fniv = gen_sqshrnt_vec,
7073 .opt_opc = sqshrnt_vec_list,
7074 .load_dest = true,
7075 .fno = gen_helper_sve2_sqshrnt_d,
7076 .vece = MO_64 },
7077};
7078TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
743bb147 7079
f7f2f0fa
RH
7080static const GVecGen2i sqrshrnb_ops[3] = {
7081 { .fno = gen_helper_sve2_sqrshrnb_h },
7082 { .fno = gen_helper_sve2_sqrshrnb_s },
7083 { .fno = gen_helper_sve2_sqrshrnb_d },
7084};
7085TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
743bb147 7086
f7f2f0fa
RH
7087static const GVecGen2i sqrshrnt_ops[3] = {
7088 { .fno = gen_helper_sve2_sqrshrnt_h },
7089 { .fno = gen_helper_sve2_sqrshrnt_s },
7090 { .fno = gen_helper_sve2_sqrshrnt_d },
7091};
7092TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
743bb147 7093
c13418da
RH
7094static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7095 TCGv_vec n, int64_t shr)
7096{
7097 TCGv_vec t = tcg_temp_new_vec_matching(d);
7098 int halfbits = 4 << vece;
7099
7100 tcg_gen_shri_vec(vece, n, n, shr);
7101 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7102 tcg_gen_umin_vec(vece, d, n, t);
7103 tcg_temp_free_vec(t);
7104}
7105
f7f2f0fa
RH
7106static const TCGOpcode uqshrnb_vec_list[] = {
7107 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7108};
7109static const GVecGen2i uqshrnb_ops[3] = {
7110 { .fniv = gen_uqshrnb_vec,
7111 .opt_opc = uqshrnb_vec_list,
7112 .fno = gen_helper_sve2_uqshrnb_h,
7113 .vece = MO_16 },
7114 { .fniv = gen_uqshrnb_vec,
7115 .opt_opc = uqshrnb_vec_list,
7116 .fno = gen_helper_sve2_uqshrnb_s,
7117 .vece = MO_32 },
7118 { .fniv = gen_uqshrnb_vec,
7119 .opt_opc = uqshrnb_vec_list,
7120 .fno = gen_helper_sve2_uqshrnb_d,
7121 .vece = MO_64 },
7122};
7123TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
c13418da
RH
7124
7125static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7126 TCGv_vec n, int64_t shr)
7127{
7128 TCGv_vec t = tcg_temp_new_vec_matching(d);
7129 int halfbits = 4 << vece;
7130
7131 tcg_gen_shri_vec(vece, n, n, shr);
7132 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7133 tcg_gen_umin_vec(vece, n, n, t);
7134 tcg_gen_shli_vec(vece, n, n, halfbits);
7135 tcg_gen_bitsel_vec(vece, d, t, d, n);
7136 tcg_temp_free_vec(t);
7137}
7138
f7f2f0fa
RH
7139static const TCGOpcode uqshrnt_vec_list[] = {
7140 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7141};
7142static const GVecGen2i uqshrnt_ops[3] = {
7143 { .fniv = gen_uqshrnt_vec,
7144 .opt_opc = uqshrnt_vec_list,
7145 .load_dest = true,
7146 .fno = gen_helper_sve2_uqshrnt_h,
7147 .vece = MO_16 },
7148 { .fniv = gen_uqshrnt_vec,
7149 .opt_opc = uqshrnt_vec_list,
7150 .load_dest = true,
7151 .fno = gen_helper_sve2_uqshrnt_s,
7152 .vece = MO_32 },
7153 { .fniv = gen_uqshrnt_vec,
7154 .opt_opc = uqshrnt_vec_list,
7155 .load_dest = true,
7156 .fno = gen_helper_sve2_uqshrnt_d,
7157 .vece = MO_64 },
7158};
7159TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
c13418da 7160
f7f2f0fa
RH
7161static const GVecGen2i uqrshrnb_ops[3] = {
7162 { .fno = gen_helper_sve2_uqrshrnb_h },
7163 { .fno = gen_helper_sve2_uqrshrnb_s },
7164 { .fno = gen_helper_sve2_uqrshrnb_d },
7165};
7166TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
c13418da 7167
f7f2f0fa
RH
7168static const GVecGen2i uqrshrnt_ops[3] = {
7169 { .fno = gen_helper_sve2_uqrshrnt_h },
7170 { .fno = gen_helper_sve2_uqrshrnt_s },
7171 { .fno = gen_helper_sve2_uqrshrnt_d },
7172};
7173TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
b87dbeeb 7174
40d5ea50 7175#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7176 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7177 NULL, gen_helper_sve2_##name##_h, \
7178 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7179 }; \
bd394cf5
RH
7180 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7181 name##_fns[a->esz], a, 0)
40d5ea50
SL
7182
7183DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7184DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7185DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7186DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7187
c3cd6766
SL
7188DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7189DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7190DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7191DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7192
ef75309b
RH
7193static gen_helper_gvec_flags_4 * const match_fns[4] = {
7194 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7195};
46feb361 7196TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 7197
ef75309b
RH
7198static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7199 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7200};
46feb361 7201TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 7202
5880bdc0
RH
7203static gen_helper_gvec_4 * const histcnt_fns[4] = {
7204 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7205};
46feb361
RH
7206TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7207 histcnt_fns[a->esz], a, 0)
7d47ac94 7208
46feb361
RH
7209TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7210 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7211
7de2617b
RH
7212DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7213DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7214DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7215DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7216DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
bfc9307e
RH
7217
7218/*
7219 * SVE Integer Multiply-Add (unpredicated)
7220 */
7221
4464ee36
RH
7222TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
7223 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
7224 0, FPST_FPCR)
7225TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
7226 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
7227 0, FPST_FPCR)
4f26756b 7228
eeb4e84d
RH
7229static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7230 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7231 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7232};
7233TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7234 sqdmlal_zzzw_fns[a->esz], a, 0)
7235TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7236 sqdmlal_zzzw_fns[a->esz], a, 3)
7237TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7238 sqdmlal_zzzw_fns[a->esz], a, 2)
7239
7240static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7241 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7242 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7243};
7244TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7245 sqdmlsl_zzzw_fns[a->esz], a, 0)
7246TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7247 sqdmlsl_zzzw_fns[a->esz], a, 3)
7248TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7249 sqdmlsl_zzzw_fns[a->esz], a, 2)
7250
7251static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7252 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7253 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7254};
7255TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7256 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7257
eeb4e84d
RH
7258static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7259 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7260 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7261};
7262TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7263 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7264
eeb4e84d
RH
7265static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7266 NULL, gen_helper_sve2_smlal_zzzw_h,
7267 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7268};
7269TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7270 smlal_zzzw_fns[a->esz], a, 0)
7271TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7272 smlal_zzzw_fns[a->esz], a, 1)
7273
7274static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7275 NULL, gen_helper_sve2_umlal_zzzw_h,
7276 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7277};
7278TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7279 umlal_zzzw_fns[a->esz], a, 0)
7280TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7281 umlal_zzzw_fns[a->esz], a, 1)
7282
7283static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7284 NULL, gen_helper_sve2_smlsl_zzzw_h,
7285 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7286};
7287TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7288 smlsl_zzzw_fns[a->esz], a, 0)
7289TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7290 smlsl_zzzw_fns[a->esz], a, 1)
7291
7292static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7293 NULL, gen_helper_sve2_umlsl_zzzw_h,
7294 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7295};
7296TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7297 umlsl_zzzw_fns[a->esz], a, 0)
7298TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7299 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7300
5f425b92
RH
7301static gen_helper_gvec_4 * const cmla_fns[] = {
7302 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7303 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7304};
7305TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7306 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7307
5f425b92
RH
7308static gen_helper_gvec_4 * const cdot_fns[] = {
7309 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7310};
7311TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7312 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7313
5f425b92
RH
7314static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7315 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7316 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7317};
7318TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7319 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7320
8740d694
RH
7321TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7322 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7323
46feb361
RH
7324TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7325 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7326
46feb361
RH
7327TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7328 gen_helper_crypto_aese, a, false)
7329TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7330 gen_helper_crypto_aese, a, true)
3cc7a88e 7331
46feb361
RH
7332TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7333 gen_helper_crypto_sm4e, a, 0)
7334TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7335 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7336
46feb361
RH
7337TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
7338 gen_gvec_rax1, a)
5c1b7226 7339
0360730c
RH
7340TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7341 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7342TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7343 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
83c2523f 7344
0360730c
RH
7345TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7346 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
83c2523f 7347
0360730c
RH
7348TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7349 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7350TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7351 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
95365277 7352
27645836
RH
7353TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7354 float_round_to_odd, gen_helper_sve_fcvt_ds)
7355TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7356 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
631be02e 7357
7b9dfcfe
RH
7358static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7359 NULL, gen_helper_flogb_h,
7360 gen_helper_flogb_s, gen_helper_flogb_d
7361};
7362TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7363 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
50d102bd
SL
7364
7365static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7366{
41bf9b67
RH
7367 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7368 a->rd, a->rn, a->rm, a->ra,
7369 (sel << 1) | sub, cpu_env);
50d102bd
SL
7370}
7371
72c7f906
RH
7372TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7373TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7374TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7375TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
50d102bd
SL
7376
7377static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7378{
41bf9b67
RH
7379 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7380 a->rd, a->rn, a->rm, a->ra,
7381 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7382}
7383
fc7c8829
RH
7384TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7385TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7386TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7387TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
2323c5ff 7388
d79f3d5f
RH
7389TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7390 gen_helper_gvec_smmla_b, a, 0)
7391TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7392 gen_helper_gvec_usmmla_b, a, 0)
7393TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7394 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7395
eec05e4e
RH
7396TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7397 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7398TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7399 gen_helper_gvec_bfdot_idx, a)
81266a1f 7400
4464ee36
RH
7401TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7402 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7403
7404static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7405{
41bf9b67
RH
7406 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7407 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7408}
7409
698ddb9d
RH
7410TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7411TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
458d0ab6
RH
7412
7413static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7414{
41bf9b67
RH
7415 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7416 a->rd, a->rn, a->rm, a->ra,
7417 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7418}
7419
698ddb9d
RH
7420TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7421TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
598ab0b2
RH
7422
7423static bool trans_PSEL(DisasContext *s, arg_psel *a)
7424{
7425 int vl = vec_full_reg_size(s);
7426 int pl = pred_gvec_reg_size(s);
7427 int elements = vl >> a->esz;
7428 TCGv_i64 tmp, didx, dbit;
7429 TCGv_ptr ptr;
7430
7431 if (!dc_isar_feature(aa64_sme, s)) {
7432 return false;
7433 }
7434 if (!sve_access_check(s)) {
7435 return true;
7436 }
7437
7438 tmp = tcg_temp_new_i64();
7439 dbit = tcg_temp_new_i64();
7440 didx = tcg_temp_new_i64();
7441 ptr = tcg_temp_new_ptr();
7442
7443 /* Compute the predicate element. */
7444 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
7445 if (is_power_of_2(elements)) {
7446 tcg_gen_andi_i64(tmp, tmp, elements - 1);
7447 } else {
7448 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
7449 }
7450
7451 /* Extract the predicate byte and bit indices. */
7452 tcg_gen_shli_i64(tmp, tmp, a->esz);
7453 tcg_gen_andi_i64(dbit, tmp, 7);
7454 tcg_gen_shri_i64(didx, tmp, 3);
7455 if (HOST_BIG_ENDIAN) {
7456 tcg_gen_xori_i64(didx, didx, 7);
7457 }
7458
7459 /* Load the predicate word. */
7460 tcg_gen_trunc_i64_ptr(ptr, didx);
7461 tcg_gen_add_ptr(ptr, ptr, cpu_env);
7462 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
7463
7464 /* Extract the predicate bit and replicate to MO_64. */
7465 tcg_gen_shr_i64(tmp, tmp, dbit);
7466 tcg_gen_andi_i64(tmp, tmp, 1);
7467 tcg_gen_neg_i64(tmp, tmp);
7468
7469 /* Apply to either copy the source, or write zeros. */
7470 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
7471 pred_full_reg_offset(s, a->pn), tmp, pl, pl);
7472
7473 tcg_temp_free_i64(tmp);
7474 tcg_temp_free_i64(dbit);
7475 tcg_temp_free_i64(didx);
7476 tcg_temp_free_ptr(ptr);
7477 return true;
7478}