]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Use TRANS_FEAT for FTMAD
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
38388f7e 33#include "translate-a64.h"
cc48affe 34#include "fpu/softfloat.h"
38388f7e 35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
451e4ffd 56static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
451e4ffd 62static int tszimm_shr(DisasContext *s, int x)
ccd841c3 63{
451e4ffd 64 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
65}
66
67/* See e.g. LSL (immediate, predicated). */
451e4ffd 68static int tszimm_shl(DisasContext *s, int x)
ccd841c3 69{
451e4ffd 70 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
71}
72
f25a2361 73/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 74static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
75{
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
77}
78
451e4ffd 79static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
80{
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
82}
83
c4e7c493
RH
84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
86 */
451e4ffd 87static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
88{
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
91}
92
38388f7e
RH
93/*
94 * Include the generated decoder.
95 */
96
139c1837 97#include "decode-sve.c.inc"
38388f7e
RH
98
99/*
100 * Implement all of the translator functions referenced by the decoder.
101 */
102
d1822297
RH
103/* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
105 */
106static inline int pred_full_reg_offset(DisasContext *s, int regno)
107{
108 return offsetof(CPUARMState, vfp.pregs[regno]);
109}
110
111/* Return the byte size of the whole predicate register, VL / 64. */
112static inline int pred_full_reg_size(DisasContext *s)
113{
114 return s->sve_len >> 3;
115}
116
516e246a
RH
117/* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
121 *
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
124 */
125static int size_for_gvec(int size)
126{
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
131 }
132}
133
134static int pred_gvec_reg_size(DisasContext *s)
135{
136 return size_for_gvec(pred_full_reg_size(s));
137}
138
40e32e5a 139/* Invoke an out-of-line helper on 2 Zregs. */
c5edf07d 140static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
40e32e5a
RH
141 int rd, int rn, int data)
142{
c5edf07d
RH
143 if (fn == NULL) {
144 return false;
145 }
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
151 }
152 return true;
40e32e5a
RH
153}
154
e645d1a1 155/* Invoke an out-of-line helper on 3 Zregs. */
913a8a00 156static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
e645d1a1
RH
157 int rd, int rn, int rm, int data)
158{
913a8a00
RH
159 if (fn == NULL) {
160 return false;
161 }
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
165 vec_full_reg_offset(s, rn),
166 vec_full_reg_offset(s, rm),
167 vsz, vsz, data, fn);
168 }
169 return true;
e645d1a1
RH
170}
171
84a272f5
RH
172static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
173 arg_rrr_esz *a, int data)
174{
175 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
176}
177
532724e4
RH
178/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
179static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
180 int rd, int rn, int rm,
181 int data, ARMFPStatusFlavour flavour)
182{
183 if (fn == NULL) {
184 return false;
185 }
186 if (sve_access_check(s)) {
187 unsigned vsz = vec_full_reg_size(s);
188 TCGv_ptr status = fpstatus_ptr(flavour);
189
190 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
191 vec_full_reg_offset(s, rn),
192 vec_full_reg_offset(s, rm),
193 status, vsz, vsz, data, fn);
194
195 tcg_temp_free_ptr(status);
196 }
197 return true;
198}
199
200static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
201 arg_rrr_esz *a, int data)
202{
203 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
204 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
205}
206
38650638 207/* Invoke an out-of-line helper on 4 Zregs. */
7ad416b1 208static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
38650638
RH
209 int rd, int rn, int rm, int ra, int data)
210{
7ad416b1
RH
211 if (fn == NULL) {
212 return false;
213 }
214 if (sve_access_check(s)) {
215 unsigned vsz = vec_full_reg_size(s);
216 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
217 vec_full_reg_offset(s, rn),
218 vec_full_reg_offset(s, rm),
219 vec_full_reg_offset(s, ra),
220 vsz, vsz, data, fn);
221 }
222 return true;
38650638
RH
223}
224
cab79ac9
RH
225static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
226 arg_rrrr_esz *a, int data)
227{
228 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
229}
230
e82d3536
RH
231static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
232 arg_rrxr_esz *a)
233{
234 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
235}
236
41bf9b67
RH
237/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
238static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
239 int rd, int rn, int rm, int ra,
240 int data, TCGv_ptr ptr)
241{
242 if (fn == NULL) {
243 return false;
244 }
245 if (sve_access_check(s)) {
246 unsigned vsz = vec_full_reg_size(s);
247 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
248 vec_full_reg_offset(s, rn),
249 vec_full_reg_offset(s, rm),
250 vec_full_reg_offset(s, ra),
251 ptr, vsz, vsz, data, fn);
252 }
253 return true;
254}
255
256static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
257 int rd, int rn, int rm, int ra,
258 int data, ARMFPStatusFlavour flavour)
259{
260 TCGv_ptr status = fpstatus_ptr(flavour);
261 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
262 tcg_temp_free_ptr(status);
263 return ret;
264}
265
96a461f7 266/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
8fb27a21 267static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
96a461f7
RH
268 int rd, int rn, int pg, int data)
269{
8fb27a21
RH
270 if (fn == NULL) {
271 return false;
272 }
273 if (sve_access_check(s)) {
274 unsigned vsz = vec_full_reg_size(s);
275 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
276 vec_full_reg_offset(s, rn),
277 pred_full_reg_offset(s, pg),
278 vsz, vsz, data, fn);
279 }
280 return true;
96a461f7
RH
281}
282
b051809a
RH
283static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
284 arg_rpr_esz *a, int data)
285{
286 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
287}
288
afa2529c
RH
289static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
290 arg_rpri_esz *a)
291{
292 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
293}
b051809a 294
36cbb7a8 295/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
2a753d1e 296static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
36cbb7a8
RH
297 int rd, int rn, int rm, int pg, int data)
298{
2a753d1e
RH
299 if (fn == NULL) {
300 return false;
301 }
302 if (sve_access_check(s)) {
303 unsigned vsz = vec_full_reg_size(s);
304 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
305 vec_full_reg_offset(s, rn),
306 vec_full_reg_offset(s, rm),
307 pred_full_reg_offset(s, pg),
308 vsz, vsz, data, fn);
309 }
310 return true;
36cbb7a8 311}
f7d79c41 312
312016c9
RH
313static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
314 arg_rprr_esz *a, int data)
315{
316 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
317}
318
faf915e2
RH
319/* Invoke a vector expander on two Zregs and an immediate. */
320static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
321 int esz, int rd, int rn, uint64_t imm)
322{
323 if (gvec_fn == NULL) {
324 return false;
325 }
326 if (sve_access_check(s)) {
327 unsigned vsz = vec_full_reg_size(s);
328 gvec_fn(esz, vec_full_reg_offset(s, rd),
329 vec_full_reg_offset(s, rn), imm, vsz, vsz);
330 }
331 return true;
332}
333
ada378f0
RH
334static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
335 arg_rri_esz *a)
336{
337 if (a->esz < 0) {
338 /* Invalid tsz encoding -- see tszimm_esz. */
339 return false;
340 }
341 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
342}
343
39eea561 344/* Invoke a vector expander on three Zregs. */
50f6db5f 345static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
28c4da31 346 int esz, int rd, int rn, int rm)
38388f7e 347{
50f6db5f
RH
348 if (gvec_fn == NULL) {
349 return false;
350 }
351 if (sve_access_check(s)) {
352 unsigned vsz = vec_full_reg_size(s);
353 gvec_fn(esz, vec_full_reg_offset(s, rd),
354 vec_full_reg_offset(s, rn),
355 vec_full_reg_offset(s, rm), vsz, vsz);
356 }
357 return true;
38388f7e
RH
358}
359
cd54bbe6
RH
360static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
361 arg_rrr_esz *a)
362{
363 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
364}
365
911cdc6d 366/* Invoke a vector expander on four Zregs. */
189876af
RH
367static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
368 arg_rrrr_esz *a)
911cdc6d 369{
189876af
RH
370 if (gvec_fn == NULL) {
371 return false;
372 }
373 if (sve_access_check(s)) {
374 unsigned vsz = vec_full_reg_size(s);
375 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
376 vec_full_reg_offset(s, a->rn),
377 vec_full_reg_offset(s, a->rm),
378 vec_full_reg_offset(s, a->ra), vsz, vsz);
379 }
380 return true;
911cdc6d
RH
381}
382
39eea561
RH
383/* Invoke a vector move on two Zregs. */
384static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 385{
f7d79c41 386 if (sve_access_check(s)) {
5f730621
RH
387 unsigned vsz = vec_full_reg_size(s);
388 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
389 vec_full_reg_offset(s, rn), vsz, vsz);
f7d79c41
RH
390 }
391 return true;
38388f7e
RH
392}
393
d9d78dcc
RH
394/* Initialize a Zreg with replications of a 64-bit immediate. */
395static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
396{
397 unsigned vsz = vec_full_reg_size(s);
8711e71f 398 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
399}
400
516e246a 401/* Invoke a vector expander on three Pregs. */
23e5fa5f 402static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
dd81a8d7 403 int rd, int rn, int rm)
516e246a 404{
23e5fa5f
RH
405 if (sve_access_check(s)) {
406 unsigned psz = pred_gvec_reg_size(s);
407 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
408 pred_full_reg_offset(s, rn),
409 pred_full_reg_offset(s, rm), psz, psz);
410 }
411 return true;
516e246a
RH
412}
413
414/* Invoke a vector move on two Pregs. */
415static bool do_mov_p(DisasContext *s, int rd, int rn)
416{
d0b2df5a
RH
417 if (sve_access_check(s)) {
418 unsigned psz = pred_gvec_reg_size(s);
419 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
420 pred_full_reg_offset(s, rn), psz, psz);
421 }
422 return true;
516e246a
RH
423}
424
9e18d7a6
RH
425/* Set the cpu flags as per a return from an SVE helper. */
426static void do_pred_flags(TCGv_i32 t)
427{
428 tcg_gen_mov_i32(cpu_NF, t);
429 tcg_gen_andi_i32(cpu_ZF, t, 2);
430 tcg_gen_andi_i32(cpu_CF, t, 1);
431 tcg_gen_movi_i32(cpu_VF, 0);
432}
433
434/* Subroutines computing the ARM PredTest psuedofunction. */
435static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
436{
437 TCGv_i32 t = tcg_temp_new_i32();
438
439 gen_helper_sve_predtest1(t, d, g);
440 do_pred_flags(t);
441 tcg_temp_free_i32(t);
442}
443
444static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
445{
446 TCGv_ptr dptr = tcg_temp_new_ptr();
447 TCGv_ptr gptr = tcg_temp_new_ptr();
392acacc 448 TCGv_i32 t = tcg_temp_new_i32();
9e18d7a6
RH
449
450 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
451 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
9e18d7a6 452
392acacc 453 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
9e18d7a6
RH
454 tcg_temp_free_ptr(dptr);
455 tcg_temp_free_ptr(gptr);
456
457 do_pred_flags(t);
458 tcg_temp_free_i32(t);
459}
460
028e2a7b
RH
461/* For each element size, the bits within a predicate word that are active. */
462const uint64_t pred_esz_masks[4] = {
463 0xffffffffffffffffull, 0x5555555555555555ull,
464 0x1111111111111111ull, 0x0101010101010101ull
465};
466
c437c59b
RH
467static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
468{
469 unallocated_encoding(s);
470 return true;
471}
472
39eea561
RH
473/*
474 *** SVE Logical - Unpredicated Group
475 */
476
b262215b
RH
477TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
478TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
479TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
480TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
d1822297 481
e6eba6e5
RH
482static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
483{
484 TCGv_i64 t = tcg_temp_new_i64();
485 uint64_t mask = dup_const(MO_8, 0xff >> sh);
486
487 tcg_gen_xor_i64(t, n, m);
488 tcg_gen_shri_i64(d, t, sh);
489 tcg_gen_shli_i64(t, t, 8 - sh);
490 tcg_gen_andi_i64(d, d, mask);
491 tcg_gen_andi_i64(t, t, ~mask);
492 tcg_gen_or_i64(d, d, t);
493 tcg_temp_free_i64(t);
494}
495
496static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
497{
498 TCGv_i64 t = tcg_temp_new_i64();
499 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
500
501 tcg_gen_xor_i64(t, n, m);
502 tcg_gen_shri_i64(d, t, sh);
503 tcg_gen_shli_i64(t, t, 16 - sh);
504 tcg_gen_andi_i64(d, d, mask);
505 tcg_gen_andi_i64(t, t, ~mask);
506 tcg_gen_or_i64(d, d, t);
507 tcg_temp_free_i64(t);
508}
509
510static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
511{
512 tcg_gen_xor_i32(d, n, m);
513 tcg_gen_rotri_i32(d, d, sh);
514}
515
516static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
517{
518 tcg_gen_xor_i64(d, n, m);
519 tcg_gen_rotri_i64(d, d, sh);
520}
521
522static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
523 TCGv_vec m, int64_t sh)
524{
525 tcg_gen_xor_vec(vece, d, n, m);
526 tcg_gen_rotri_vec(vece, d, d, sh);
527}
528
529void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
530 uint32_t rm_ofs, int64_t shift,
531 uint32_t opr_sz, uint32_t max_sz)
532{
533 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
534 static const GVecGen3i ops[4] = {
535 { .fni8 = gen_xar8_i64,
536 .fniv = gen_xar_vec,
537 .fno = gen_helper_sve2_xar_b,
538 .opt_opc = vecop,
539 .vece = MO_8 },
540 { .fni8 = gen_xar16_i64,
541 .fniv = gen_xar_vec,
542 .fno = gen_helper_sve2_xar_h,
543 .opt_opc = vecop,
544 .vece = MO_16 },
545 { .fni4 = gen_xar_i32,
546 .fniv = gen_xar_vec,
547 .fno = gen_helper_sve2_xar_s,
548 .opt_opc = vecop,
549 .vece = MO_32 },
550 { .fni8 = gen_xar_i64,
551 .fniv = gen_xar_vec,
552 .fno = gen_helper_gvec_xar_d,
553 .opt_opc = vecop,
554 .vece = MO_64 }
555 };
556 int esize = 8 << vece;
557
558 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
559 tcg_debug_assert(shift >= 0);
560 tcg_debug_assert(shift <= esize);
561 shift &= esize - 1;
562
563 if (shift == 0) {
564 /* xar with no rotate devolves to xor. */
565 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
566 } else {
567 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
568 shift, &ops[vece]);
569 }
570}
571
572static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
573{
574 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
575 return false;
576 }
577 if (sve_access_check(s)) {
578 unsigned vsz = vec_full_reg_size(s);
579 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
580 vec_full_reg_offset(s, a->rn),
581 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
582 }
583 return true;
584}
585
911cdc6d
RH
586static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
587{
588 tcg_gen_xor_i64(d, n, m);
589 tcg_gen_xor_i64(d, d, k);
590}
591
592static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
593 TCGv_vec m, TCGv_vec k)
594{
595 tcg_gen_xor_vec(vece, d, n, m);
596 tcg_gen_xor_vec(vece, d, d, k);
597}
598
599static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
600 uint32_t a, uint32_t oprsz, uint32_t maxsz)
601{
602 static const GVecGen4 op = {
603 .fni8 = gen_eor3_i64,
604 .fniv = gen_eor3_vec,
605 .fno = gen_helper_sve2_eor3,
606 .vece = MO_64,
607 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
608 };
609 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
610}
611
b773a5c8 612TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
911cdc6d
RH
613
614static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
615{
616 tcg_gen_andc_i64(d, m, k);
617 tcg_gen_xor_i64(d, d, n);
618}
619
620static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
621 TCGv_vec m, TCGv_vec k)
622{
623 tcg_gen_andc_vec(vece, d, m, k);
624 tcg_gen_xor_vec(vece, d, d, n);
625}
626
627static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
628 uint32_t a, uint32_t oprsz, uint32_t maxsz)
629{
630 static const GVecGen4 op = {
631 .fni8 = gen_bcax_i64,
632 .fniv = gen_bcax_vec,
633 .fno = gen_helper_sve2_bcax,
634 .vece = MO_64,
635 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
636 };
637 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
638}
639
b773a5c8 640TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
911cdc6d
RH
641
642static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
643 uint32_t a, uint32_t oprsz, uint32_t maxsz)
644{
645 /* BSL differs from the generic bitsel in argument ordering. */
646 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
647}
648
b773a5c8 649TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
911cdc6d
RH
650
651static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
652{
653 tcg_gen_andc_i64(n, k, n);
654 tcg_gen_andc_i64(m, m, k);
655 tcg_gen_or_i64(d, n, m);
656}
657
658static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
659 TCGv_vec m, TCGv_vec k)
660{
661 if (TCG_TARGET_HAS_bitsel_vec) {
662 tcg_gen_not_vec(vece, n, n);
663 tcg_gen_bitsel_vec(vece, d, k, n, m);
664 } else {
665 tcg_gen_andc_vec(vece, n, k, n);
666 tcg_gen_andc_vec(vece, m, m, k);
667 tcg_gen_or_vec(vece, d, n, m);
668 }
669}
670
671static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
672 uint32_t a, uint32_t oprsz, uint32_t maxsz)
673{
674 static const GVecGen4 op = {
675 .fni8 = gen_bsl1n_i64,
676 .fniv = gen_bsl1n_vec,
677 .fno = gen_helper_sve2_bsl1n,
678 .vece = MO_64,
679 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
680 };
681 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
682}
683
b773a5c8 684TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
911cdc6d
RH
685
686static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
687{
688 /*
689 * Z[dn] = (n & k) | (~m & ~k)
690 * = | ~(m | k)
691 */
692 tcg_gen_and_i64(n, n, k);
693 if (TCG_TARGET_HAS_orc_i64) {
694 tcg_gen_or_i64(m, m, k);
695 tcg_gen_orc_i64(d, n, m);
696 } else {
697 tcg_gen_nor_i64(m, m, k);
698 tcg_gen_or_i64(d, n, m);
699 }
700}
701
702static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
703 TCGv_vec m, TCGv_vec k)
704{
705 if (TCG_TARGET_HAS_bitsel_vec) {
706 tcg_gen_not_vec(vece, m, m);
707 tcg_gen_bitsel_vec(vece, d, k, n, m);
708 } else {
709 tcg_gen_and_vec(vece, n, n, k);
710 tcg_gen_or_vec(vece, m, m, k);
711 tcg_gen_orc_vec(vece, d, n, m);
712 }
713}
714
715static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
716 uint32_t a, uint32_t oprsz, uint32_t maxsz)
717{
718 static const GVecGen4 op = {
719 .fni8 = gen_bsl2n_i64,
720 .fniv = gen_bsl2n_vec,
721 .fno = gen_helper_sve2_bsl2n,
722 .vece = MO_64,
723 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
724 };
725 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
726}
727
b773a5c8 728TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
911cdc6d
RH
729
730static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
731{
732 tcg_gen_and_i64(n, n, k);
733 tcg_gen_andc_i64(m, m, k);
734 tcg_gen_nor_i64(d, n, m);
735}
736
737static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
738 TCGv_vec m, TCGv_vec k)
739{
740 tcg_gen_bitsel_vec(vece, d, k, n, m);
741 tcg_gen_not_vec(vece, d, d);
742}
743
744static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
745 uint32_t a, uint32_t oprsz, uint32_t maxsz)
746{
747 static const GVecGen4 op = {
748 .fni8 = gen_nbsl_i64,
749 .fniv = gen_nbsl_vec,
750 .fno = gen_helper_sve2_nbsl,
751 .vece = MO_64,
752 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
753 };
754 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
755}
756
b773a5c8 757TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
911cdc6d 758
fea98f9c
RH
759/*
760 *** SVE Integer Arithmetic - Unpredicated Group
761 */
762
b262215b
RH
763TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
764TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
765TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
766TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
767TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
768TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
fea98f9c 769
f97cfd59
RH
770/*
771 *** SVE Integer Arithmetic - Binary Predicated Group
772 */
773
a2103582
RH
774/* Select active elememnts from Zn and inactive elements from Zm,
775 * storing the result in Zd.
776 */
68cc4ee3 777static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
a2103582
RH
778{
779 static gen_helper_gvec_4 * const fns[4] = {
780 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
781 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
782 };
68cc4ee3 783 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
784}
785
8e7acb24
RH
786#define DO_ZPZZ(NAME, FEAT, name) \
787 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
788 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
789 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
f97cfd59 790 }; \
8e7acb24
RH
791 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
792 name##_zpzz_fns[a->esz], a, 0)
793
794DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
795DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
796DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
797DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
798
799DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
800DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
801
802DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
803DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
804DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
805DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
806DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
807DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
808
809DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
810DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
811DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
812
813DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
814DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
815DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
816
817static gen_helper_gvec_4 * const sdiv_fns[4] = {
818 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
819};
820TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
f97cfd59 821
8e7acb24
RH
822static gen_helper_gvec_4 * const udiv_fns[4] = {
823 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
824};
825TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
f97cfd59 826
29693f5f 827TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
d3fe4a29 828
afac6d04
RH
829/*
830 *** SVE Integer Arithmetic - Unary Predicated Group
831 */
832
817bd5c9
RH
833#define DO_ZPZ(NAME, FEAT, name) \
834 static gen_helper_gvec_3 * const name##_fns[4] = { \
835 gen_helper_##name##_b, gen_helper_##name##_h, \
836 gen_helper_##name##_s, gen_helper_##name##_d, \
afac6d04 837 }; \
817bd5c9
RH
838 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
839
840DO_ZPZ(CLS, aa64_sve, sve_cls)
841DO_ZPZ(CLZ, aa64_sve, sve_clz)
842DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
843DO_ZPZ(CNOT, aa64_sve, sve_cnot)
844DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
845DO_ZPZ(ABS, aa64_sve, sve_abs)
846DO_ZPZ(NEG, aa64_sve, sve_neg)
847DO_ZPZ(RBIT, aa64_sve, sve_rbit)
848
849static gen_helper_gvec_3 * const fabs_fns[4] = {
850 NULL, gen_helper_sve_fabs_h,
851 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
852};
853TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
afac6d04 854
817bd5c9
RH
855static gen_helper_gvec_3 * const fneg_fns[4] = {
856 NULL, gen_helper_sve_fneg_h,
857 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
858};
859TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
afac6d04 860
817bd5c9
RH
861static gen_helper_gvec_3 * const sxtb_fns[4] = {
862 NULL, gen_helper_sve_sxtb_h,
863 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
864};
865TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
afac6d04 866
817bd5c9
RH
867static gen_helper_gvec_3 * const uxtb_fns[4] = {
868 NULL, gen_helper_sve_uxtb_h,
869 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
870};
871TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
afac6d04 872
817bd5c9
RH
873static gen_helper_gvec_3 * const sxth_fns[4] = {
874 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
875};
876TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
afac6d04 877
817bd5c9
RH
878static gen_helper_gvec_3 * const uxth_fns[4] = {
879 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
880};
881TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
afac6d04 882
817bd5c9
RH
883TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
884 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
885TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
886 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
afac6d04 887
047cec97
RH
888/*
889 *** SVE Integer Reduction Group
890 */
891
892typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
893static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
894 gen_helper_gvec_reduc *fn)
895{
896 unsigned vsz = vec_full_reg_size(s);
897 TCGv_ptr t_zn, t_pg;
898 TCGv_i32 desc;
899 TCGv_i64 temp;
900
901 if (fn == NULL) {
902 return false;
903 }
904 if (!sve_access_check(s)) {
905 return true;
906 }
907
c6a59b55 908 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
047cec97
RH
909 temp = tcg_temp_new_i64();
910 t_zn = tcg_temp_new_ptr();
911 t_pg = tcg_temp_new_ptr();
912
913 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
914 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
915 fn(temp, t_zn, t_pg, desc);
916 tcg_temp_free_ptr(t_zn);
917 tcg_temp_free_ptr(t_pg);
047cec97
RH
918
919 write_fp_dreg(s, a->rd, temp);
920 tcg_temp_free_i64(temp);
921 return true;
922}
923
924#define DO_VPZ(NAME, name) \
9ac24f1f 925 static gen_helper_gvec_reduc * const name##_fns[4] = { \
047cec97
RH
926 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
927 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
928 }; \
9ac24f1f 929 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
047cec97
RH
930
931DO_VPZ(ORV, orv)
932DO_VPZ(ANDV, andv)
933DO_VPZ(EORV, eorv)
934
935DO_VPZ(UADDV, uaddv)
936DO_VPZ(SMAXV, smaxv)
937DO_VPZ(UMAXV, umaxv)
938DO_VPZ(SMINV, sminv)
939DO_VPZ(UMINV, uminv)
940
9ac24f1f
RH
941static gen_helper_gvec_reduc * const saddv_fns[4] = {
942 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
943 gen_helper_sve_saddv_s, NULL
944};
945TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
047cec97
RH
946
947#undef DO_VPZ
948
ccd841c3
RH
949/*
950 *** SVE Shift by Immediate - Predicated Group
951 */
952
60245996
RH
953/*
954 * Copy Zn into Zd, storing zeros into inactive elements.
955 * If invert, store zeros into the active elements.
ccd841c3 956 */
60245996
RH
957static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
958 int esz, bool invert)
ccd841c3 959{
60245996
RH
960 static gen_helper_gvec_3 * const fns[4] = {
961 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
962 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 963 };
8fb27a21 964 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
965}
966
73c558a8
RH
967static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
968 gen_helper_gvec_3 * const fns[4])
ccd841c3 969{
73c558a8
RH
970 int max;
971
ccd841c3
RH
972 if (a->esz < 0) {
973 /* Invalid tsz encoding -- see tszimm_esz. */
974 return false;
975 }
73c558a8
RH
976
977 /*
978 * Shift by element size is architecturally valid.
979 * For arithmetic right-shift, it's the same as by one less.
980 * For logical shifts and ASRD, it is a zeroing operation.
981 */
982 max = 8 << a->esz;
983 if (a->imm >= max) {
984 if (asr) {
985 a->imm = max - 1;
986 } else {
987 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
988 }
989 }
afa2529c 990 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
ccd841c3
RH
991}
992
5cccd1f1
RH
993static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
994 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
995 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
996};
997TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
73c558a8 998
5cccd1f1
RH
999static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1000 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1001 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1002};
1003TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
ccd841c3 1004
5cccd1f1
RH
1005static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1006 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1007 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1008};
1009TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
ccd841c3 1010
5cccd1f1
RH
1011static gen_helper_gvec_3 * const asrd_fns[4] = {
1012 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1013 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1014};
1015TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
ccd841c3 1016
4df37e41
RH
1017static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1018 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1019 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1020};
1021TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1022 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
a5421b54 1023
4df37e41
RH
1024static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1025 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1026 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1027};
1028TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1029 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
a5421b54 1030
4df37e41
RH
1031static gen_helper_gvec_3 * const srshr_fns[4] = {
1032 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1033 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1034};
1035TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1036 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
a5421b54 1037
4df37e41
RH
1038static gen_helper_gvec_3 * const urshr_fns[4] = {
1039 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1040 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1041};
1042TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1043 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
a5421b54 1044
4df37e41
RH
1045static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1046 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1047 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1048};
1049TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1050 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
a5421b54 1051
fe7f8dfb
RH
1052/*
1053 *** SVE Bitwise Shift - Predicated Group
1054 */
1055
1056#define DO_ZPZW(NAME, name) \
8e7acb24 1057 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
fe7f8dfb 1058 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
8e7acb24 1059 gen_helper_sve_##name##_zpzw_s, NULL \
fe7f8dfb 1060 }; \
8e7acb24
RH
1061 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1062 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
fe7f8dfb
RH
1063
1064DO_ZPZW(ASR, asr)
1065DO_ZPZW(LSR, lsr)
1066DO_ZPZW(LSL, lsl)
1067
1068#undef DO_ZPZW
1069
d9d78dcc
RH
1070/*
1071 *** SVE Bitwise Shift - Unpredicated Group
1072 */
1073
1074static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1075 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1076 int64_t, uint32_t, uint32_t))
1077{
1078 if (a->esz < 0) {
1079 /* Invalid tsz encoding -- see tszimm_esz. */
1080 return false;
1081 }
1082 if (sve_access_check(s)) {
1083 unsigned vsz = vec_full_reg_size(s);
1084 /* Shift by element size is architecturally valid. For
1085 arithmetic right-shift, it's the same as by one less.
1086 Otherwise it is a zeroing operation. */
1087 if (a->imm >= 8 << a->esz) {
1088 if (asr) {
1089 a->imm = (8 << a->esz) - 1;
1090 } else {
1091 do_dupi_z(s, a->rd, 0);
1092 return true;
1093 }
1094 }
1095 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1096 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1097 }
1098 return true;
1099}
1100
5e612f80
RH
1101TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1102TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1103TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
d9d78dcc 1104
d9d78dcc 1105#define DO_ZZW(NAME, name) \
32e2ad65 1106 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
d9d78dcc
RH
1107 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1108 gen_helper_sve_##name##_zzw_s, NULL \
1109 }; \
32e2ad65
RH
1110 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1111 name##_zzw_fns[a->esz], a, 0)
d9d78dcc 1112
32e2ad65
RH
1113DO_ZZW(ASR_zzw, asr)
1114DO_ZZW(LSR_zzw, lsr)
1115DO_ZZW(LSL_zzw, lsl)
d9d78dcc
RH
1116
1117#undef DO_ZZW
1118
96a36e4a
RH
1119/*
1120 *** SVE Integer Multiply-Add Group
1121 */
1122
1123static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1124 gen_helper_gvec_5 *fn)
1125{
1126 if (sve_access_check(s)) {
1127 unsigned vsz = vec_full_reg_size(s);
1128 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1129 vec_full_reg_offset(s, a->ra),
1130 vec_full_reg_offset(s, a->rn),
1131 vec_full_reg_offset(s, a->rm),
1132 pred_full_reg_offset(s, a->pg),
1133 vsz, vsz, 0, fn);
1134 }
1135 return true;
1136}
1137
dc67e645
RH
1138static gen_helper_gvec_5 * const mla_fns[4] = {
1139 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1140 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1141};
1142TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
96a36e4a 1143
dc67e645
RH
1144static gen_helper_gvec_5 * const mls_fns[4] = {
1145 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1146 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1147};
1148TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
96a36e4a 1149
9a56c9c3
RH
1150/*
1151 *** SVE Index Generation Group
1152 */
1153
6687d05d 1154static bool do_index(DisasContext *s, int esz, int rd,
9a56c9c3
RH
1155 TCGv_i64 start, TCGv_i64 incr)
1156{
6687d05d
RH
1157 unsigned vsz;
1158 TCGv_i32 desc;
1159 TCGv_ptr t_zd;
1160
1161 if (!sve_access_check(s)) {
1162 return true;
1163 }
1164
1165 vsz = vec_full_reg_size(s);
1166 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1167 t_zd = tcg_temp_new_ptr();
9a56c9c3
RH
1168
1169 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1170 if (esz == 3) {
1171 gen_helper_sve_index_d(t_zd, start, incr, desc);
1172 } else {
1173 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1174 static index_fn * const fns[3] = {
1175 gen_helper_sve_index_b,
1176 gen_helper_sve_index_h,
1177 gen_helper_sve_index_s,
1178 };
1179 TCGv_i32 s32 = tcg_temp_new_i32();
1180 TCGv_i32 i32 = tcg_temp_new_i32();
1181
1182 tcg_gen_extrl_i64_i32(s32, start);
1183 tcg_gen_extrl_i64_i32(i32, incr);
1184 fns[esz](t_zd, s32, i32, desc);
1185
1186 tcg_temp_free_i32(s32);
1187 tcg_temp_free_i32(i32);
1188 }
1189 tcg_temp_free_ptr(t_zd);
6687d05d 1190 return true;
9a56c9c3
RH
1191}
1192
9aa60c83
RH
1193TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1194 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1195TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1196 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1197TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1198 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1199TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1200 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
9a56c9c3 1201
96f922cc
RH
1202/*
1203 *** SVE Stack Allocation Group
1204 */
1205
3a7be554 1206static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 1207{
5de56742
AC
1208 if (sve_access_check(s)) {
1209 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1210 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1211 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1212 }
96f922cc
RH
1213 return true;
1214}
1215
3a7be554 1216static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 1217{
5de56742
AC
1218 if (sve_access_check(s)) {
1219 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1220 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1221 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1222 }
96f922cc
RH
1223 return true;
1224}
1225
3a7be554 1226static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 1227{
5de56742
AC
1228 if (sve_access_check(s)) {
1229 TCGv_i64 reg = cpu_reg(s, a->rd);
1230 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1231 }
96f922cc
RH
1232 return true;
1233}
1234
4b242d9c
RH
1235/*
1236 *** SVE Compute Vector Address Group
1237 */
1238
1239static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1240{
913a8a00 1241 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
1242}
1243
dcba3d67
RH
1244TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1245TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1246TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1247TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
4b242d9c 1248
0762cd42
RH
1249/*
1250 *** SVE Integer Misc - Unpredicated Group
1251 */
1252
0ea3cdbf
RH
1253static gen_helper_gvec_2 * const fexpa_fns[4] = {
1254 NULL, gen_helper_sve_fexpa_h,
1255 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1256};
1257TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1258 fexpa_fns[a->esz], a->rd, a->rn, 0)
0762cd42 1259
32e2ad65
RH
1260static gen_helper_gvec_3 * const ftssel_fns[4] = {
1261 NULL, gen_helper_sve_ftssel_h,
1262 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1263};
1264TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
a1f233f2 1265
516e246a
RH
1266/*
1267 *** SVE Predicate Logical Operations Group
1268 */
1269
1270static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1271 const GVecGen4 *gvec_op)
1272{
1273 if (!sve_access_check(s)) {
1274 return true;
1275 }
1276
1277 unsigned psz = pred_gvec_reg_size(s);
1278 int dofs = pred_full_reg_offset(s, a->rd);
1279 int nofs = pred_full_reg_offset(s, a->rn);
1280 int mofs = pred_full_reg_offset(s, a->rm);
1281 int gofs = pred_full_reg_offset(s, a->pg);
1282
dd81a8d7
RH
1283 if (!a->s) {
1284 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1285 return true;
1286 }
1287
516e246a
RH
1288 if (psz == 8) {
1289 /* Do the operation and the flags generation in temps. */
1290 TCGv_i64 pd = tcg_temp_new_i64();
1291 TCGv_i64 pn = tcg_temp_new_i64();
1292 TCGv_i64 pm = tcg_temp_new_i64();
1293 TCGv_i64 pg = tcg_temp_new_i64();
1294
1295 tcg_gen_ld_i64(pn, cpu_env, nofs);
1296 tcg_gen_ld_i64(pm, cpu_env, mofs);
1297 tcg_gen_ld_i64(pg, cpu_env, gofs);
1298
1299 gvec_op->fni8(pd, pn, pm, pg);
1300 tcg_gen_st_i64(pd, cpu_env, dofs);
1301
1302 do_predtest1(pd, pg);
1303
1304 tcg_temp_free_i64(pd);
1305 tcg_temp_free_i64(pn);
1306 tcg_temp_free_i64(pm);
1307 tcg_temp_free_i64(pg);
1308 } else {
1309 /* The operation and flags generation is large. The computation
1310 * of the flags depends on the original contents of the guarding
1311 * predicate. If the destination overwrites the guarding predicate,
1312 * then the easiest way to get this right is to save a copy.
1313 */
1314 int tofs = gofs;
1315 if (a->rd == a->pg) {
1316 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1317 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1318 }
1319
1320 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1321 do_predtest(s, dofs, tofs, psz / 8);
1322 }
1323 return true;
1324}
1325
1326static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1327{
1328 tcg_gen_and_i64(pd, pn, pm);
1329 tcg_gen_and_i64(pd, pd, pg);
1330}
1331
1332static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1333 TCGv_vec pm, TCGv_vec pg)
1334{
1335 tcg_gen_and_vec(vece, pd, pn, pm);
1336 tcg_gen_and_vec(vece, pd, pd, pg);
1337}
1338
3a7be554 1339static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1340{
1341 static const GVecGen4 op = {
1342 .fni8 = gen_and_pg_i64,
1343 .fniv = gen_and_pg_vec,
1344 .fno = gen_helper_sve_and_pppp,
1345 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1346 };
dd81a8d7
RH
1347
1348 if (!a->s) {
dd81a8d7
RH
1349 if (a->rn == a->rm) {
1350 if (a->pg == a->rn) {
23e5fa5f 1351 return do_mov_p(s, a->rd, a->rn);
dd81a8d7 1352 }
23e5fa5f 1353 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
dd81a8d7 1354 } else if (a->pg == a->rn || a->pg == a->rm) {
23e5fa5f 1355 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
516e246a 1356 }
516e246a 1357 }
dd81a8d7 1358 return do_pppp_flags(s, a, &op);
516e246a
RH
1359}
1360
1361static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1362{
1363 tcg_gen_andc_i64(pd, pn, pm);
1364 tcg_gen_and_i64(pd, pd, pg);
1365}
1366
1367static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1368 TCGv_vec pm, TCGv_vec pg)
1369{
1370 tcg_gen_andc_vec(vece, pd, pn, pm);
1371 tcg_gen_and_vec(vece, pd, pd, pg);
1372}
1373
3a7be554 1374static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1375{
1376 static const GVecGen4 op = {
1377 .fni8 = gen_bic_pg_i64,
1378 .fniv = gen_bic_pg_vec,
1379 .fno = gen_helper_sve_bic_pppp,
1380 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1381 };
dd81a8d7
RH
1382
1383 if (!a->s && a->pg == a->rn) {
23e5fa5f 1384 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
516e246a 1385 }
dd81a8d7 1386 return do_pppp_flags(s, a, &op);
516e246a
RH
1387}
1388
1389static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1390{
1391 tcg_gen_xor_i64(pd, pn, pm);
1392 tcg_gen_and_i64(pd, pd, pg);
1393}
1394
1395static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1396 TCGv_vec pm, TCGv_vec pg)
1397{
1398 tcg_gen_xor_vec(vece, pd, pn, pm);
1399 tcg_gen_and_vec(vece, pd, pd, pg);
1400}
1401
3a7be554 1402static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1403{
1404 static const GVecGen4 op = {
1405 .fni8 = gen_eor_pg_i64,
1406 .fniv = gen_eor_pg_vec,
1407 .fno = gen_helper_sve_eor_pppp,
1408 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1409 };
738b679c
RH
1410
1411 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1412 if (!a->s && a->pg == a->rm) {
1413 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1414 }
dd81a8d7 1415 return do_pppp_flags(s, a, &op);
516e246a
RH
1416}
1417
3a7be554 1418static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1419{
516e246a
RH
1420 if (a->s) {
1421 return false;
516e246a 1422 }
d4bc6232
RH
1423 if (sve_access_check(s)) {
1424 unsigned psz = pred_gvec_reg_size(s);
1425 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1426 pred_full_reg_offset(s, a->pg),
1427 pred_full_reg_offset(s, a->rn),
1428 pred_full_reg_offset(s, a->rm), psz, psz);
1429 }
1430 return true;
516e246a
RH
1431}
1432
1433static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1434{
1435 tcg_gen_or_i64(pd, pn, pm);
1436 tcg_gen_and_i64(pd, pd, pg);
1437}
1438
1439static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1440 TCGv_vec pm, TCGv_vec pg)
1441{
1442 tcg_gen_or_vec(vece, pd, pn, pm);
1443 tcg_gen_and_vec(vece, pd, pd, pg);
1444}
1445
3a7be554 1446static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1447{
1448 static const GVecGen4 op = {
1449 .fni8 = gen_orr_pg_i64,
1450 .fniv = gen_orr_pg_vec,
1451 .fno = gen_helper_sve_orr_pppp,
1452 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1453 };
dd81a8d7
RH
1454
1455 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1456 return do_mov_p(s, a->rd, a->rn);
516e246a 1457 }
dd81a8d7 1458 return do_pppp_flags(s, a, &op);
516e246a
RH
1459}
1460
1461static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1462{
1463 tcg_gen_orc_i64(pd, pn, pm);
1464 tcg_gen_and_i64(pd, pd, pg);
1465}
1466
1467static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1468 TCGv_vec pm, TCGv_vec pg)
1469{
1470 tcg_gen_orc_vec(vece, pd, pn, pm);
1471 tcg_gen_and_vec(vece, pd, pd, pg);
1472}
1473
3a7be554 1474static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1475{
1476 static const GVecGen4 op = {
1477 .fni8 = gen_orn_pg_i64,
1478 .fniv = gen_orn_pg_vec,
1479 .fno = gen_helper_sve_orn_pppp,
1480 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1481 };
dd81a8d7 1482 return do_pppp_flags(s, a, &op);
516e246a
RH
1483}
1484
1485static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1486{
1487 tcg_gen_or_i64(pd, pn, pm);
1488 tcg_gen_andc_i64(pd, pg, pd);
1489}
1490
1491static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1492 TCGv_vec pm, TCGv_vec pg)
1493{
1494 tcg_gen_or_vec(vece, pd, pn, pm);
1495 tcg_gen_andc_vec(vece, pd, pg, pd);
1496}
1497
3a7be554 1498static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1499{
1500 static const GVecGen4 op = {
1501 .fni8 = gen_nor_pg_i64,
1502 .fniv = gen_nor_pg_vec,
1503 .fno = gen_helper_sve_nor_pppp,
1504 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1505 };
dd81a8d7 1506 return do_pppp_flags(s, a, &op);
516e246a
RH
1507}
1508
1509static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1510{
1511 tcg_gen_and_i64(pd, pn, pm);
1512 tcg_gen_andc_i64(pd, pg, pd);
1513}
1514
1515static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1516 TCGv_vec pm, TCGv_vec pg)
1517{
1518 tcg_gen_and_vec(vece, pd, pn, pm);
1519 tcg_gen_andc_vec(vece, pd, pg, pd);
1520}
1521
3a7be554 1522static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1523{
1524 static const GVecGen4 op = {
1525 .fni8 = gen_nand_pg_i64,
1526 .fniv = gen_nand_pg_vec,
1527 .fno = gen_helper_sve_nand_pppp,
1528 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1529 };
dd81a8d7 1530 return do_pppp_flags(s, a, &op);
516e246a
RH
1531}
1532
9e18d7a6
RH
1533/*
1534 *** SVE Predicate Misc Group
1535 */
1536
3a7be554 1537static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1538{
1539 if (sve_access_check(s)) {
1540 int nofs = pred_full_reg_offset(s, a->rn);
1541 int gofs = pred_full_reg_offset(s, a->pg);
1542 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1543
1544 if (words == 1) {
1545 TCGv_i64 pn = tcg_temp_new_i64();
1546 TCGv_i64 pg = tcg_temp_new_i64();
1547
1548 tcg_gen_ld_i64(pn, cpu_env, nofs);
1549 tcg_gen_ld_i64(pg, cpu_env, gofs);
1550 do_predtest1(pn, pg);
1551
1552 tcg_temp_free_i64(pn);
1553 tcg_temp_free_i64(pg);
1554 } else {
1555 do_predtest(s, nofs, gofs, words);
1556 }
1557 }
1558 return true;
1559}
1560
028e2a7b
RH
1561/* See the ARM pseudocode DecodePredCount. */
1562static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1563{
1564 unsigned elements = fullsz >> esz;
1565 unsigned bound;
1566
1567 switch (pattern) {
1568 case 0x0: /* POW2 */
1569 return pow2floor(elements);
1570 case 0x1: /* VL1 */
1571 case 0x2: /* VL2 */
1572 case 0x3: /* VL3 */
1573 case 0x4: /* VL4 */
1574 case 0x5: /* VL5 */
1575 case 0x6: /* VL6 */
1576 case 0x7: /* VL7 */
1577 case 0x8: /* VL8 */
1578 bound = pattern;
1579 break;
1580 case 0x9: /* VL16 */
1581 case 0xa: /* VL32 */
1582 case 0xb: /* VL64 */
1583 case 0xc: /* VL128 */
1584 case 0xd: /* VL256 */
1585 bound = 16 << (pattern - 9);
1586 break;
1587 case 0x1d: /* MUL4 */
1588 return elements - elements % 4;
1589 case 0x1e: /* MUL3 */
1590 return elements - elements % 3;
1591 case 0x1f: /* ALL */
1592 return elements;
1593 default: /* #uimm5 */
1594 return 0;
1595 }
1596 return elements >= bound ? bound : 0;
1597}
1598
1599/* This handles all of the predicate initialization instructions,
1600 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1601 * so that decode_pred_count returns 0. For SETFFR, we will have
1602 * set RD == 16 == FFR.
1603 */
1604static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1605{
1606 if (!sve_access_check(s)) {
1607 return true;
1608 }
1609
1610 unsigned fullsz = vec_full_reg_size(s);
1611 unsigned ofs = pred_full_reg_offset(s, rd);
1612 unsigned numelem, setsz, i;
1613 uint64_t word, lastword;
1614 TCGv_i64 t;
1615
1616 numelem = decode_pred_count(fullsz, pat, esz);
1617
1618 /* Determine what we must store into each bit, and how many. */
1619 if (numelem == 0) {
1620 lastword = word = 0;
1621 setsz = fullsz;
1622 } else {
1623 setsz = numelem << esz;
1624 lastword = word = pred_esz_masks[esz];
1625 if (setsz % 64) {
973558a3 1626 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1627 }
1628 }
1629
1630 t = tcg_temp_new_i64();
1631 if (fullsz <= 64) {
1632 tcg_gen_movi_i64(t, lastword);
1633 tcg_gen_st_i64(t, cpu_env, ofs);
1634 goto done;
1635 }
1636
1637 if (word == lastword) {
1638 unsigned maxsz = size_for_gvec(fullsz / 8);
1639 unsigned oprsz = size_for_gvec(setsz / 8);
1640
1641 if (oprsz * 8 == setsz) {
8711e71f 1642 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1643 goto done;
1644 }
028e2a7b
RH
1645 }
1646
1647 setsz /= 8;
1648 fullsz /= 8;
1649
1650 tcg_gen_movi_i64(t, word);
973558a3 1651 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1652 tcg_gen_st_i64(t, cpu_env, ofs + i);
1653 }
1654 if (lastword != word) {
1655 tcg_gen_movi_i64(t, lastword);
1656 tcg_gen_st_i64(t, cpu_env, ofs + i);
1657 i += 8;
1658 }
1659 if (i < fullsz) {
1660 tcg_gen_movi_i64(t, 0);
1661 for (; i < fullsz; i += 8) {
1662 tcg_gen_st_i64(t, cpu_env, ofs + i);
1663 }
1664 }
1665
1666 done:
1667 tcg_temp_free_i64(t);
1668
1669 /* PTRUES */
1670 if (setflag) {
1671 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1672 tcg_gen_movi_i32(cpu_CF, word == 0);
1673 tcg_gen_movi_i32(cpu_VF, 0);
1674 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1675 }
1676 return true;
1677}
1678
b03a8501 1679TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
028e2a7b 1680
b03a8501
RH
1681/* Note pat == 31 is #all, to set all elements. */
1682TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
028e2a7b 1683
b03a8501
RH
1684/* Note pat == 32 is #unimp, to set no elements. */
1685TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
028e2a7b 1686
3a7be554 1687static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1688{
1689 /* The path through do_pppp_flags is complicated enough to want to avoid
1690 * duplication. Frob the arguments into the form of a predicated AND.
1691 */
1692 arg_rprr_s alt_a = {
1693 .rd = a->rd, .pg = a->pg, .s = a->s,
1694 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1695 };
3a7be554 1696 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1697}
1698
ff502658
RH
1699TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1700TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
028e2a7b
RH
1701
1702static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1703 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1704 TCGv_ptr, TCGv_i32))
1705{
1706 if (!sve_access_check(s)) {
1707 return true;
1708 }
1709
1710 TCGv_ptr t_pd = tcg_temp_new_ptr();
1711 TCGv_ptr t_pg = tcg_temp_new_ptr();
1712 TCGv_i32 t;
86300b5d 1713 unsigned desc = 0;
028e2a7b 1714
86300b5d
RH
1715 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1716 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1717
1718 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1719 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
392acacc 1720 t = tcg_temp_new_i32();
028e2a7b 1721
392acacc 1722 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
028e2a7b
RH
1723 tcg_temp_free_ptr(t_pd);
1724 tcg_temp_free_ptr(t_pg);
1725
1726 do_pred_flags(t);
1727 tcg_temp_free_i32(t);
1728 return true;
1729}
1730
d95040e3
RH
1731TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1732TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
028e2a7b 1733
24e82e68
RH
1734/*
1735 *** SVE Element Count Group
1736 */
1737
1738/* Perform an inline saturating addition of a 32-bit value within
1739 * a 64-bit register. The second operand is known to be positive,
1740 * which halves the comparisions we must perform to bound the result.
1741 */
1742static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1743{
1744 int64_t ibound;
24e82e68
RH
1745
1746 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1747 if (u) {
1748 tcg_gen_ext32u_i64(reg, reg);
1749 } else {
1750 tcg_gen_ext32s_i64(reg, reg);
1751 }
1752 if (d) {
1753 tcg_gen_sub_i64(reg, reg, val);
1754 ibound = (u ? 0 : INT32_MIN);
aa5b0b29 1755 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68
RH
1756 } else {
1757 tcg_gen_add_i64(reg, reg, val);
1758 ibound = (u ? UINT32_MAX : INT32_MAX);
aa5b0b29 1759 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
24e82e68 1760 }
24e82e68
RH
1761}
1762
1763/* Similarly with 64-bit values. */
1764static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1765{
1766 TCGv_i64 t0 = tcg_temp_new_i64();
24e82e68
RH
1767 TCGv_i64 t2;
1768
1769 if (u) {
1770 if (d) {
1771 tcg_gen_sub_i64(t0, reg, val);
35a1ec8e
PMD
1772 t2 = tcg_constant_i64(0);
1773 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
24e82e68
RH
1774 } else {
1775 tcg_gen_add_i64(t0, reg, val);
35a1ec8e
PMD
1776 t2 = tcg_constant_i64(-1);
1777 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
24e82e68
RH
1778 }
1779 } else {
35a1ec8e 1780 TCGv_i64 t1 = tcg_temp_new_i64();
24e82e68
RH
1781 if (d) {
1782 /* Detect signed overflow for subtraction. */
1783 tcg_gen_xor_i64(t0, reg, val);
1784 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1785 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1786 tcg_gen_and_i64(t0, t0, reg);
1787
1788 /* Bound the result. */
1789 tcg_gen_movi_i64(reg, INT64_MIN);
35a1ec8e 1790 t2 = tcg_constant_i64(0);
24e82e68
RH
1791 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1792 } else {
1793 /* Detect signed overflow for addition. */
1794 tcg_gen_xor_i64(t0, reg, val);
1795 tcg_gen_add_i64(reg, reg, val);
1796 tcg_gen_xor_i64(t1, reg, val);
1797 tcg_gen_andc_i64(t0, t1, t0);
1798
1799 /* Bound the result. */
1800 tcg_gen_movi_i64(t1, INT64_MAX);
35a1ec8e 1801 t2 = tcg_constant_i64(0);
24e82e68
RH
1802 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1803 }
35a1ec8e 1804 tcg_temp_free_i64(t1);
24e82e68
RH
1805 }
1806 tcg_temp_free_i64(t0);
24e82e68
RH
1807}
1808
1809/* Similarly with a vector and a scalar operand. */
1810static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1811 TCGv_i64 val, bool u, bool d)
1812{
1813 unsigned vsz = vec_full_reg_size(s);
1814 TCGv_ptr dptr, nptr;
1815 TCGv_i32 t32, desc;
1816 TCGv_i64 t64;
1817
1818 dptr = tcg_temp_new_ptr();
1819 nptr = tcg_temp_new_ptr();
1820 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1821 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
c6a59b55 1822 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
24e82e68
RH
1823
1824 switch (esz) {
1825 case MO_8:
1826 t32 = tcg_temp_new_i32();
1827 tcg_gen_extrl_i64_i32(t32, val);
1828 if (d) {
1829 tcg_gen_neg_i32(t32, t32);
1830 }
1831 if (u) {
1832 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1833 } else {
1834 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1835 }
1836 tcg_temp_free_i32(t32);
1837 break;
1838
1839 case MO_16:
1840 t32 = tcg_temp_new_i32();
1841 tcg_gen_extrl_i64_i32(t32, val);
1842 if (d) {
1843 tcg_gen_neg_i32(t32, t32);
1844 }
1845 if (u) {
1846 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1847 } else {
1848 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1849 }
1850 tcg_temp_free_i32(t32);
1851 break;
1852
1853 case MO_32:
1854 t64 = tcg_temp_new_i64();
1855 if (d) {
1856 tcg_gen_neg_i64(t64, val);
1857 } else {
1858 tcg_gen_mov_i64(t64, val);
1859 }
1860 if (u) {
1861 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1862 } else {
1863 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1864 }
1865 tcg_temp_free_i64(t64);
1866 break;
1867
1868 case MO_64:
1869 if (u) {
1870 if (d) {
1871 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1872 } else {
1873 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1874 }
1875 } else if (d) {
1876 t64 = tcg_temp_new_i64();
1877 tcg_gen_neg_i64(t64, val);
1878 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1879 tcg_temp_free_i64(t64);
1880 } else {
1881 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1882 }
1883 break;
1884
1885 default:
1886 g_assert_not_reached();
1887 }
1888
1889 tcg_temp_free_ptr(dptr);
1890 tcg_temp_free_ptr(nptr);
24e82e68
RH
1891}
1892
3a7be554 1893static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1894{
1895 if (sve_access_check(s)) {
1896 unsigned fullsz = vec_full_reg_size(s);
1897 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1898 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1899 }
1900 return true;
1901}
1902
3a7be554 1903static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1904{
1905 if (sve_access_check(s)) {
1906 unsigned fullsz = vec_full_reg_size(s);
1907 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1908 int inc = numelem * a->imm * (a->d ? -1 : 1);
1909 TCGv_i64 reg = cpu_reg(s, a->rd);
1910
1911 tcg_gen_addi_i64(reg, reg, inc);
1912 }
1913 return true;
1914}
1915
3a7be554 1916static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1917{
1918 if (!sve_access_check(s)) {
1919 return true;
1920 }
1921
1922 unsigned fullsz = vec_full_reg_size(s);
1923 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1924 int inc = numelem * a->imm;
1925 TCGv_i64 reg = cpu_reg(s, a->rd);
1926
1927 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1928 if (inc == 0) {
1929 if (a->u) {
1930 tcg_gen_ext32u_i64(reg, reg);
1931 } else {
1932 tcg_gen_ext32s_i64(reg, reg);
1933 }
1934 } else {
d681f125 1935 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1936 }
1937 return true;
1938}
1939
3a7be554 1940static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1941{
1942 if (!sve_access_check(s)) {
1943 return true;
1944 }
1945
1946 unsigned fullsz = vec_full_reg_size(s);
1947 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1948 int inc = numelem * a->imm;
1949 TCGv_i64 reg = cpu_reg(s, a->rd);
1950
1951 if (inc != 0) {
d681f125 1952 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1953 }
1954 return true;
1955}
1956
3a7be554 1957static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1958{
1959 if (a->esz == 0) {
1960 return false;
1961 }
1962
1963 unsigned fullsz = vec_full_reg_size(s);
1964 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1965 int inc = numelem * a->imm;
1966
1967 if (inc != 0) {
1968 if (sve_access_check(s)) {
24e82e68
RH
1969 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1970 vec_full_reg_offset(s, a->rn),
d681f125
RH
1971 tcg_constant_i64(a->d ? -inc : inc),
1972 fullsz, fullsz);
24e82e68
RH
1973 }
1974 } else {
1975 do_mov_z(s, a->rd, a->rn);
1976 }
1977 return true;
1978}
1979
3a7be554 1980static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1981{
1982 if (a->esz == 0) {
1983 return false;
1984 }
1985
1986 unsigned fullsz = vec_full_reg_size(s);
1987 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1988 int inc = numelem * a->imm;
1989
1990 if (inc != 0) {
1991 if (sve_access_check(s)) {
d681f125
RH
1992 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
1993 tcg_constant_i64(inc), a->u, a->d);
24e82e68
RH
1994 }
1995 } else {
1996 do_mov_z(s, a->rd, a->rn);
1997 }
1998 return true;
1999}
2000
e1fa1164
RH
2001/*
2002 *** SVE Bitwise Immediate Group
2003 */
2004
2005static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2006{
2007 uint64_t imm;
2008 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2009 extract32(a->dbm, 0, 6),
2010 extract32(a->dbm, 6, 6))) {
2011 return false;
2012 }
faf915e2 2013 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
e1fa1164
RH
2014}
2015
15a314da
RH
2016TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2017TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2018TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
e1fa1164 2019
3a7be554 2020static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
2021{
2022 uint64_t imm;
2023 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2024 extract32(a->dbm, 0, 6),
2025 extract32(a->dbm, 6, 6))) {
2026 return false;
2027 }
2028 if (sve_access_check(s)) {
2029 do_dupi_z(s, a->rd, imm);
2030 }
2031 return true;
2032}
2033
f25a2361
RH
2034/*
2035 *** SVE Integer Wide Immediate - Predicated Group
2036 */
2037
2038/* Implement all merging copies. This is used for CPY (immediate),
2039 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2040 */
2041static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2042 TCGv_i64 val)
2043{
2044 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2045 static gen_cpy * const fns[4] = {
2046 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2047 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2048 };
2049 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2050 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
f25a2361
RH
2051 TCGv_ptr t_zd = tcg_temp_new_ptr();
2052 TCGv_ptr t_zn = tcg_temp_new_ptr();
2053 TCGv_ptr t_pg = tcg_temp_new_ptr();
2054
2055 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2056 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2057 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2058
2059 fns[esz](t_zd, t_zn, t_pg, val, desc);
2060
2061 tcg_temp_free_ptr(t_zd);
2062 tcg_temp_free_ptr(t_zn);
2063 tcg_temp_free_ptr(t_pg);
f25a2361
RH
2064}
2065
3a7be554 2066static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
2067{
2068 if (a->esz == 0) {
2069 return false;
2070 }
2071 if (sve_access_check(s)) {
2072 /* Decode the VFP immediate. */
2073 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
e152b48b 2074 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
f25a2361
RH
2075 }
2076 return true;
2077}
2078
3a7be554 2079static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 2080{
f25a2361 2081 if (sve_access_check(s)) {
e152b48b 2082 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
f25a2361
RH
2083 }
2084 return true;
2085}
2086
3a7be554 2087static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
2088{
2089 static gen_helper_gvec_2i * const fns[4] = {
2090 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2091 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2092 };
2093
f25a2361
RH
2094 if (sve_access_check(s)) {
2095 unsigned vsz = vec_full_reg_size(s);
f25a2361
RH
2096 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2097 pred_full_reg_offset(s, a->pg),
e152b48b
RH
2098 tcg_constant_i64(a->imm),
2099 vsz, vsz, 0, fns[a->esz]);
f25a2361
RH
2100 }
2101 return true;
2102}
2103
b94f8f60
RH
2104/*
2105 *** SVE Permute Extract Group
2106 */
2107
75114792 2108static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
b94f8f60
RH
2109{
2110 if (!sve_access_check(s)) {
2111 return true;
2112 }
2113
2114 unsigned vsz = vec_full_reg_size(s);
75114792 2115 unsigned n_ofs = imm >= vsz ? 0 : imm;
b94f8f60 2116 unsigned n_siz = vsz - n_ofs;
75114792
SL
2117 unsigned d = vec_full_reg_offset(s, rd);
2118 unsigned n = vec_full_reg_offset(s, rn);
2119 unsigned m = vec_full_reg_offset(s, rm);
b94f8f60
RH
2120
2121 /* Use host vector move insns if we have appropriate sizes
2122 * and no unfortunate overlap.
2123 */
2124 if (m != d
2125 && n_ofs == size_for_gvec(n_ofs)
2126 && n_siz == size_for_gvec(n_siz)
2127 && (d != n || n_siz <= n_ofs)) {
2128 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2129 if (n_ofs != 0) {
2130 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2131 }
2132 } else {
2133 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2134 }
2135 return true;
2136}
2137
c799c115
RH
2138TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2139TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
75114792 2140
30562ab7
RH
2141/*
2142 *** SVE Permute - Unpredicated Group
2143 */
2144
3a7be554 2145static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2146{
2147 if (sve_access_check(s)) {
2148 unsigned vsz = vec_full_reg_size(s);
2149 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2150 vsz, vsz, cpu_reg_sp(s, a->rn));
2151 }
2152 return true;
2153}
2154
3a7be554 2155static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2156{
2157 if ((a->imm & 0x1f) == 0) {
2158 return false;
2159 }
2160 if (sve_access_check(s)) {
2161 unsigned vsz = vec_full_reg_size(s);
2162 unsigned dofs = vec_full_reg_offset(s, a->rd);
2163 unsigned esz, index;
2164
2165 esz = ctz32(a->imm);
2166 index = a->imm >> (esz + 1);
2167
2168 if ((index << esz) < vsz) {
2169 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2170 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2171 } else {
7e17d50e
RH
2172 /*
2173 * While dup_mem handles 128-bit elements, dup_imm does not.
2174 * Thankfully element size doesn't matter for splatting zero.
2175 */
2176 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2177 }
2178 }
2179 return true;
2180}
2181
2182static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2183{
2184 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2185 static gen_insr * const fns[4] = {
2186 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2187 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2188 };
2189 unsigned vsz = vec_full_reg_size(s);
c6a59b55 2190 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
30562ab7
RH
2191 TCGv_ptr t_zd = tcg_temp_new_ptr();
2192 TCGv_ptr t_zn = tcg_temp_new_ptr();
2193
2194 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2195 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2196
2197 fns[a->esz](t_zd, t_zn, val, desc);
2198
2199 tcg_temp_free_ptr(t_zd);
2200 tcg_temp_free_ptr(t_zn);
30562ab7
RH
2201}
2202
3a7be554 2203static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2204{
2205 if (sve_access_check(s)) {
2206 TCGv_i64 t = tcg_temp_new_i64();
2207 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2208 do_insr_i64(s, a, t);
2209 tcg_temp_free_i64(t);
2210 }
2211 return true;
2212}
2213
3a7be554 2214static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2215{
2216 if (sve_access_check(s)) {
2217 do_insr_i64(s, a, cpu_reg(s, a->rm));
2218 }
2219 return true;
2220}
2221
0ea3cdbf
RH
2222static gen_helper_gvec_2 * const rev_fns[4] = {
2223 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2224 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2225};
2226TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
30562ab7 2227
32e2ad65
RH
2228static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2229 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2230 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2231};
2232TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
30562ab7 2233
5f425b92
RH
2234static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2235 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2236 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2237};
2238TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2239 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
80a712a2 2240
32e2ad65
RH
2241static gen_helper_gvec_3 * const tbx_fns[4] = {
2242 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2243 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2244};
2245TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
80a712a2 2246
3a7be554 2247static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2248{
2249 static gen_helper_gvec_2 * const fns[4][2] = {
2250 { NULL, NULL },
2251 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2252 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2253 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2254 };
2255
2256 if (a->esz == 0) {
2257 return false;
2258 }
2259 if (sve_access_check(s)) {
2260 unsigned vsz = vec_full_reg_size(s);
2261 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2262 vec_full_reg_offset(s, a->rn)
2263 + (a->h ? vsz / 2 : 0),
2264 vsz, vsz, 0, fns[a->esz][a->u]);
2265 }
2266 return true;
2267}
2268
d731d8cb
RH
2269/*
2270 *** SVE Permute - Predicates Group
2271 */
2272
2273static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2274 gen_helper_gvec_3 *fn)
2275{
2276 if (!sve_access_check(s)) {
2277 return true;
2278 }
2279
2280 unsigned vsz = pred_full_reg_size(s);
2281
d731d8cb
RH
2282 TCGv_ptr t_d = tcg_temp_new_ptr();
2283 TCGv_ptr t_n = tcg_temp_new_ptr();
2284 TCGv_ptr t_m = tcg_temp_new_ptr();
f9b0fcce 2285 uint32_t desc = 0;
d731d8cb 2286
f9b0fcce
RH
2287 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2288 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2289 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2290
2291 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2292 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2293 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
d731d8cb 2294
c6a59b55 2295 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
d731d8cb
RH
2296
2297 tcg_temp_free_ptr(t_d);
2298 tcg_temp_free_ptr(t_n);
2299 tcg_temp_free_ptr(t_m);
d731d8cb
RH
2300 return true;
2301}
2302
2303static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2304 gen_helper_gvec_2 *fn)
2305{
2306 if (!sve_access_check(s)) {
2307 return true;
2308 }
2309
2310 unsigned vsz = pred_full_reg_size(s);
2311 TCGv_ptr t_d = tcg_temp_new_ptr();
2312 TCGv_ptr t_n = tcg_temp_new_ptr();
70acaafe 2313 uint32_t desc = 0;
d731d8cb
RH
2314
2315 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2316 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2317
70acaafe
RH
2318 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2319 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2320 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb 2321
c6a59b55 2322 fn(t_d, t_n, tcg_constant_i32(desc));
d731d8cb 2323
d731d8cb
RH
2324 tcg_temp_free_ptr(t_d);
2325 tcg_temp_free_ptr(t_n);
2326 return true;
2327}
2328
bdb349f5
RH
2329TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2330TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2331TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2332TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2333TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2334TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
d731d8cb 2335
1d0fce4b
RH
2336TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2337TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2338TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
d731d8cb 2339
234b48e9
RH
2340/*
2341 *** SVE Permute - Interleaving Group
2342 */
2343
a95b9618
RH
2344static gen_helper_gvec_3 * const zip_fns[4] = {
2345 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2346 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2347};
2348TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2349 zip_fns[a->esz], a, 0)
2350TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2351 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2352
2353TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2354 gen_helper_sve2_zip_q, a, 0)
2355TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2356 gen_helper_sve2_zip_q, a,
2357 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
74b64b25 2358
234b48e9
RH
2359static gen_helper_gvec_3 * const uzp_fns[4] = {
2360 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2361 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2362};
2363
32e2ad65
RH
2364TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2365 uzp_fns[a->esz], a, 0)
2366TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2367 uzp_fns[a->esz], a, 1 << a->esz)
234b48e9 2368
32e2ad65
RH
2369TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2370 gen_helper_sve2_uzp_q, a, 0)
2371TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2372 gen_helper_sve2_uzp_q, a, 16)
74b64b25 2373
234b48e9
RH
2374static gen_helper_gvec_3 * const trn_fns[4] = {
2375 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2376 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2377};
2378
32e2ad65
RH
2379TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2380 trn_fns[a->esz], a, 0)
2381TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2382 trn_fns[a->esz], a, 1 << a->esz)
234b48e9 2383
32e2ad65
RH
2384TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2385 gen_helper_sve2_trn_q, a, 0)
2386TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2387 gen_helper_sve2_trn_q, a, 16)
74b64b25 2388
3ca879ae
RH
2389/*
2390 *** SVE Permute Vector - Predicated Group
2391 */
2392
817bd5c9
RH
2393static gen_helper_gvec_3 * const compact_fns[4] = {
2394 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2395};
2396TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
3ca879ae 2397
ef23cb72
RH
2398/* Call the helper that computes the ARM LastActiveElement pseudocode
2399 * function, scaled by the element size. This includes the not found
2400 * indication; e.g. not found for esz=3 is -8.
2401 */
2402static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2403{
2404 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2405 * round up, as we do elsewhere, because we need the exact size.
2406 */
2407 TCGv_ptr t_p = tcg_temp_new_ptr();
2acbfbe4 2408 unsigned desc = 0;
ef23cb72 2409
2acbfbe4
RH
2410 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2411 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2412
2413 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
ef23cb72 2414
c6a59b55 2415 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
ef23cb72 2416
ef23cb72
RH
2417 tcg_temp_free_ptr(t_p);
2418}
2419
2420/* Increment LAST to the offset of the next element in the vector,
2421 * wrapping around to 0.
2422 */
2423static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2424{
2425 unsigned vsz = vec_full_reg_size(s);
2426
2427 tcg_gen_addi_i32(last, last, 1 << esz);
2428 if (is_power_of_2(vsz)) {
2429 tcg_gen_andi_i32(last, last, vsz - 1);
2430 } else {
4b308bd5
RH
2431 TCGv_i32 max = tcg_constant_i32(vsz);
2432 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2433 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
ef23cb72
RH
2434 }
2435}
2436
2437/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2438static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2439{
2440 unsigned vsz = vec_full_reg_size(s);
2441
2442 if (is_power_of_2(vsz)) {
2443 tcg_gen_andi_i32(last, last, vsz - 1);
2444 } else {
4b308bd5
RH
2445 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2446 TCGv_i32 zero = tcg_constant_i32(0);
ef23cb72 2447 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
ef23cb72
RH
2448 }
2449}
2450
2451/* Load an unsigned element of ESZ from BASE+OFS. */
2452static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2453{
2454 TCGv_i64 r = tcg_temp_new_i64();
2455
2456 switch (esz) {
2457 case 0:
2458 tcg_gen_ld8u_i64(r, base, ofs);
2459 break;
2460 case 1:
2461 tcg_gen_ld16u_i64(r, base, ofs);
2462 break;
2463 case 2:
2464 tcg_gen_ld32u_i64(r, base, ofs);
2465 break;
2466 case 3:
2467 tcg_gen_ld_i64(r, base, ofs);
2468 break;
2469 default:
2470 g_assert_not_reached();
2471 }
2472 return r;
2473}
2474
2475/* Load an unsigned element of ESZ from RM[LAST]. */
2476static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2477 int rm, int esz)
2478{
2479 TCGv_ptr p = tcg_temp_new_ptr();
2480 TCGv_i64 r;
2481
2482 /* Convert offset into vector into offset into ENV.
2483 * The final adjustment for the vector register base
2484 * is added via constant offset to the load.
2485 */
e03b5686 2486#if HOST_BIG_ENDIAN
ef23cb72
RH
2487 /* Adjust for element ordering. See vec_reg_offset. */
2488 if (esz < 3) {
2489 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2490 }
2491#endif
2492 tcg_gen_ext_i32_ptr(p, last);
2493 tcg_gen_add_ptr(p, p, cpu_env);
2494
2495 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2496 tcg_temp_free_ptr(p);
2497
2498 return r;
2499}
2500
2501/* Compute CLAST for a Zreg. */
2502static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2503{
2504 TCGv_i32 last;
2505 TCGLabel *over;
2506 TCGv_i64 ele;
2507 unsigned vsz, esz = a->esz;
2508
2509 if (!sve_access_check(s)) {
2510 return true;
2511 }
2512
2513 last = tcg_temp_local_new_i32();
2514 over = gen_new_label();
2515
2516 find_last_active(s, last, esz, a->pg);
2517
2518 /* There is of course no movcond for a 2048-bit vector,
2519 * so we must branch over the actual store.
2520 */
2521 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2522
2523 if (!before) {
2524 incr_last_active(s, last, esz);
2525 }
2526
2527 ele = load_last_active(s, last, a->rm, esz);
2528 tcg_temp_free_i32(last);
2529
2530 vsz = vec_full_reg_size(s);
2531 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2532 tcg_temp_free_i64(ele);
2533
2534 /* If this insn used MOVPRFX, we may need a second move. */
2535 if (a->rd != a->rn) {
2536 TCGLabel *done = gen_new_label();
2537 tcg_gen_br(done);
2538
2539 gen_set_label(over);
2540 do_mov_z(s, a->rd, a->rn);
2541
2542 gen_set_label(done);
2543 } else {
2544 gen_set_label(over);
2545 }
2546 return true;
2547}
2548
db7fa5d8
RH
2549TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2550TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
ef23cb72
RH
2551
2552/* Compute CLAST for a scalar. */
2553static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2554 bool before, TCGv_i64 reg_val)
2555{
2556 TCGv_i32 last = tcg_temp_new_i32();
053552d3 2557 TCGv_i64 ele, cmp;
ef23cb72
RH
2558
2559 find_last_active(s, last, esz, pg);
2560
2561 /* Extend the original value of last prior to incrementing. */
2562 cmp = tcg_temp_new_i64();
2563 tcg_gen_ext_i32_i64(cmp, last);
2564
2565 if (!before) {
2566 incr_last_active(s, last, esz);
2567 }
2568
2569 /* The conceit here is that while last < 0 indicates not found, after
2570 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2571 * from which we can load garbage. We then discard the garbage with
2572 * a conditional move.
2573 */
2574 ele = load_last_active(s, last, rm, esz);
2575 tcg_temp_free_i32(last);
2576
053552d3
RH
2577 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2578 ele, reg_val);
ef23cb72 2579
ef23cb72
RH
2580 tcg_temp_free_i64(cmp);
2581 tcg_temp_free_i64(ele);
2582}
2583
2584/* Compute CLAST for a Vreg. */
2585static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2586{
2587 if (sve_access_check(s)) {
2588 int esz = a->esz;
2589 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2590 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2591
2592 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2593 write_fp_dreg(s, a->rd, reg);
2594 tcg_temp_free_i64(reg);
2595 }
2596 return true;
2597}
2598
ac4fb247
RH
2599TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2600TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
ef23cb72
RH
2601
2602/* Compute CLAST for a Xreg. */
2603static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2604{
2605 TCGv_i64 reg;
2606
2607 if (!sve_access_check(s)) {
2608 return true;
2609 }
2610
2611 reg = cpu_reg(s, a->rd);
2612 switch (a->esz) {
2613 case 0:
2614 tcg_gen_ext8u_i64(reg, reg);
2615 break;
2616 case 1:
2617 tcg_gen_ext16u_i64(reg, reg);
2618 break;
2619 case 2:
2620 tcg_gen_ext32u_i64(reg, reg);
2621 break;
2622 case 3:
2623 break;
2624 default:
2625 g_assert_not_reached();
2626 }
2627
2628 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2629 return true;
2630}
2631
c673404a
RH
2632TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2633TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
ef23cb72
RH
2634
2635/* Compute LAST for a scalar. */
2636static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2637 int pg, int rm, bool before)
2638{
2639 TCGv_i32 last = tcg_temp_new_i32();
2640 TCGv_i64 ret;
2641
2642 find_last_active(s, last, esz, pg);
2643 if (before) {
2644 wrap_last_active(s, last, esz);
2645 } else {
2646 incr_last_active(s, last, esz);
2647 }
2648
2649 ret = load_last_active(s, last, rm, esz);
2650 tcg_temp_free_i32(last);
2651 return ret;
2652}
2653
2654/* Compute LAST for a Vreg. */
2655static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2656{
2657 if (sve_access_check(s)) {
2658 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2659 write_fp_dreg(s, a->rd, val);
2660 tcg_temp_free_i64(val);
2661 }
2662 return true;
2663}
2664
75de9fd4
RH
2665TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2666TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
ef23cb72
RH
2667
2668/* Compute LAST for a Xreg. */
2669static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2670{
2671 if (sve_access_check(s)) {
2672 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2673 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2674 tcg_temp_free_i64(val);
2675 }
2676 return true;
2677}
2678
884c5a80
RH
2679TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2680TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
ef23cb72 2681
3a7be554 2682static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2683{
2684 if (sve_access_check(s)) {
2685 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2686 }
2687 return true;
2688}
2689
3a7be554 2690static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2691{
2692 if (sve_access_check(s)) {
2693 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2694 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2695 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2696 tcg_temp_free_i64(t);
2697 }
2698 return true;
2699}
2700
817bd5c9
RH
2701static gen_helper_gvec_3 * const revb_fns[4] = {
2702 NULL, gen_helper_sve_revb_h,
2703 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2704};
2705TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
dae8fb90 2706
817bd5c9
RH
2707static gen_helper_gvec_3 * const revh_fns[4] = {
2708 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2709};
2710TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
dae8fb90 2711
817bd5c9
RH
2712TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2713 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
dae8fb90 2714
897ebd70
RH
2715TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2716 gen_helper_sve_splice, a, a->esz)
b48ff240 2717
897ebd70
RH
2718TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2719 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
75114792 2720
757f9cff
RH
2721/*
2722 *** SVE Integer Compare - Vectors Group
2723 */
2724
2725static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2726 gen_helper_gvec_flags_4 *gen_fn)
2727{
2728 TCGv_ptr pd, zn, zm, pg;
2729 unsigned vsz;
2730 TCGv_i32 t;
2731
2732 if (gen_fn == NULL) {
2733 return false;
2734 }
2735 if (!sve_access_check(s)) {
2736 return true;
2737 }
2738
2739 vsz = vec_full_reg_size(s);
392acacc 2740 t = tcg_temp_new_i32();
757f9cff
RH
2741 pd = tcg_temp_new_ptr();
2742 zn = tcg_temp_new_ptr();
2743 zm = tcg_temp_new_ptr();
2744 pg = tcg_temp_new_ptr();
2745
2746 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2747 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2748 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2749 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2750
392acacc 2751 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
757f9cff
RH
2752
2753 tcg_temp_free_ptr(pd);
2754 tcg_temp_free_ptr(zn);
2755 tcg_temp_free_ptr(zm);
2756 tcg_temp_free_ptr(pg);
2757
2758 do_pred_flags(t);
2759
2760 tcg_temp_free_i32(t);
2761 return true;
2762}
2763
2764#define DO_PPZZ(NAME, name) \
671bdb2e
RH
2765 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2766 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2767 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2768 }; \
2769 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2770 a, name##_ppzz_fns[a->esz])
757f9cff
RH
2771
2772DO_PPZZ(CMPEQ, cmpeq)
2773DO_PPZZ(CMPNE, cmpne)
2774DO_PPZZ(CMPGT, cmpgt)
2775DO_PPZZ(CMPGE, cmpge)
2776DO_PPZZ(CMPHI, cmphi)
2777DO_PPZZ(CMPHS, cmphs)
2778
2779#undef DO_PPZZ
2780
2781#define DO_PPZW(NAME, name) \
671bdb2e
RH
2782 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2783 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2784 gen_helper_sve_##name##_ppzw_s, NULL \
2785 }; \
2786 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2787 a, name##_ppzw_fns[a->esz])
757f9cff
RH
2788
2789DO_PPZW(CMPEQ, cmpeq)
2790DO_PPZW(CMPNE, cmpne)
2791DO_PPZW(CMPGT, cmpgt)
2792DO_PPZW(CMPGE, cmpge)
2793DO_PPZW(CMPHI, cmphi)
2794DO_PPZW(CMPHS, cmphs)
2795DO_PPZW(CMPLT, cmplt)
2796DO_PPZW(CMPLE, cmple)
2797DO_PPZW(CMPLO, cmplo)
2798DO_PPZW(CMPLS, cmpls)
2799
2800#undef DO_PPZW
2801
38cadeba
RH
2802/*
2803 *** SVE Integer Compare - Immediate Groups
2804 */
2805
2806static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2807 gen_helper_gvec_flags_3 *gen_fn)
2808{
2809 TCGv_ptr pd, zn, pg;
2810 unsigned vsz;
2811 TCGv_i32 t;
2812
2813 if (gen_fn == NULL) {
2814 return false;
2815 }
2816 if (!sve_access_check(s)) {
2817 return true;
2818 }
2819
2820 vsz = vec_full_reg_size(s);
392acacc 2821 t = tcg_temp_new_i32();
38cadeba
RH
2822 pd = tcg_temp_new_ptr();
2823 zn = tcg_temp_new_ptr();
2824 pg = tcg_temp_new_ptr();
2825
2826 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2827 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2828 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2829
392acacc 2830 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
38cadeba
RH
2831
2832 tcg_temp_free_ptr(pd);
2833 tcg_temp_free_ptr(zn);
2834 tcg_temp_free_ptr(pg);
2835
2836 do_pred_flags(t);
2837
2838 tcg_temp_free_i32(t);
2839 return true;
2840}
2841
2842#define DO_PPZI(NAME, name) \
9c545be6 2843 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
38cadeba
RH
2844 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2845 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2846 }; \
9c545be6
RH
2847 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2848 name##_ppzi_fns[a->esz])
38cadeba
RH
2849
2850DO_PPZI(CMPEQ, cmpeq)
2851DO_PPZI(CMPNE, cmpne)
2852DO_PPZI(CMPGT, cmpgt)
2853DO_PPZI(CMPGE, cmpge)
2854DO_PPZI(CMPHI, cmphi)
2855DO_PPZI(CMPHS, cmphs)
2856DO_PPZI(CMPLT, cmplt)
2857DO_PPZI(CMPLE, cmple)
2858DO_PPZI(CMPLO, cmplo)
2859DO_PPZI(CMPLS, cmpls)
2860
2861#undef DO_PPZI
2862
35da316f
RH
2863/*
2864 *** SVE Partition Break Group
2865 */
2866
2867static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2868 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2869{
2870 if (!sve_access_check(s)) {
2871 return true;
2872 }
2873
2874 unsigned vsz = pred_full_reg_size(s);
2875
2876 /* Predicate sizes may be smaller and cannot use simd_desc. */
2877 TCGv_ptr d = tcg_temp_new_ptr();
2878 TCGv_ptr n = tcg_temp_new_ptr();
2879 TCGv_ptr m = tcg_temp_new_ptr();
2880 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2881 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2882
2883 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2884 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2885 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2886 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2887
2888 if (a->s) {
93418f1c
RH
2889 TCGv_i32 t = tcg_temp_new_i32();
2890 fn_s(t, d, n, m, g, desc);
35da316f 2891 do_pred_flags(t);
93418f1c 2892 tcg_temp_free_i32(t);
35da316f 2893 } else {
93418f1c 2894 fn(d, n, m, g, desc);
35da316f
RH
2895 }
2896 tcg_temp_free_ptr(d);
2897 tcg_temp_free_ptr(n);
2898 tcg_temp_free_ptr(m);
2899 tcg_temp_free_ptr(g);
35da316f
RH
2900 return true;
2901}
2902
2903static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2904 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2905{
2906 if (!sve_access_check(s)) {
2907 return true;
2908 }
2909
2910 unsigned vsz = pred_full_reg_size(s);
2911
2912 /* Predicate sizes may be smaller and cannot use simd_desc. */
2913 TCGv_ptr d = tcg_temp_new_ptr();
2914 TCGv_ptr n = tcg_temp_new_ptr();
2915 TCGv_ptr g = tcg_temp_new_ptr();
93418f1c 2916 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2917
2918 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2919 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2920 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2921
2922 if (a->s) {
93418f1c
RH
2923 TCGv_i32 t = tcg_temp_new_i32();
2924 fn_s(t, d, n, g, desc);
35da316f 2925 do_pred_flags(t);
93418f1c 2926 tcg_temp_free_i32(t);
35da316f 2927 } else {
93418f1c 2928 fn(d, n, g, desc);
35da316f
RH
2929 }
2930 tcg_temp_free_ptr(d);
2931 tcg_temp_free_ptr(n);
2932 tcg_temp_free_ptr(g);
35da316f
RH
2933 return true;
2934}
2935
2224d24d
RH
2936TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
2937 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
2938TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
2939 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
2940
2941TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
2942 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
2943TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
2944 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
2945
2946TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
2947 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
2948TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
2949 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
2950
2951TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
2952 gen_helper_sve_brkn, gen_helper_sve_brkns)
35da316f 2953
9ee3a611
RH
2954/*
2955 *** SVE Predicate Count Group
2956 */
2957
2958static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2959{
2960 unsigned psz = pred_full_reg_size(s);
2961
2962 if (psz <= 8) {
2963 uint64_t psz_mask;
2964
2965 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2966 if (pn != pg) {
2967 TCGv_i64 g = tcg_temp_new_i64();
2968 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2969 tcg_gen_and_i64(val, val, g);
2970 tcg_temp_free_i64(g);
2971 }
2972
2973 /* Reduce the pred_esz_masks value simply to reduce the
2974 * size of the code generated here.
2975 */
2976 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2977 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2978
2979 tcg_gen_ctpop_i64(val, val);
2980 } else {
2981 TCGv_ptr t_pn = tcg_temp_new_ptr();
2982 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 2983 unsigned desc = 0;
9ee3a611 2984
f556a201
RH
2985 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2986 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
2987
2988 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2989 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
9ee3a611 2990
c6a59b55 2991 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
9ee3a611
RH
2992 tcg_temp_free_ptr(t_pn);
2993 tcg_temp_free_ptr(t_pg);
9ee3a611
RH
2994 }
2995}
2996
3a7be554 2997static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
2998{
2999 if (sve_access_check(s)) {
3000 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3001 }
3002 return true;
3003}
3004
3a7be554 3005static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3006{
3007 if (sve_access_check(s)) {
3008 TCGv_i64 reg = cpu_reg(s, a->rd);
3009 TCGv_i64 val = tcg_temp_new_i64();
3010
3011 do_cntp(s, val, a->esz, a->pg, a->pg);
3012 if (a->d) {
3013 tcg_gen_sub_i64(reg, reg, val);
3014 } else {
3015 tcg_gen_add_i64(reg, reg, val);
3016 }
3017 tcg_temp_free_i64(val);
3018 }
3019 return true;
3020}
3021
3a7be554 3022static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3023{
3024 if (a->esz == 0) {
3025 return false;
3026 }
3027 if (sve_access_check(s)) {
3028 unsigned vsz = vec_full_reg_size(s);
3029 TCGv_i64 val = tcg_temp_new_i64();
3030 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3031
3032 do_cntp(s, val, a->esz, a->pg, a->pg);
3033 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3034 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3035 }
3036 return true;
3037}
3038
3a7be554 3039static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3040{
3041 if (sve_access_check(s)) {
3042 TCGv_i64 reg = cpu_reg(s, a->rd);
3043 TCGv_i64 val = tcg_temp_new_i64();
3044
3045 do_cntp(s, val, a->esz, a->pg, a->pg);
3046 do_sat_addsub_32(reg, val, a->u, a->d);
3047 }
3048 return true;
3049}
3050
3a7be554 3051static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3052{
3053 if (sve_access_check(s)) {
3054 TCGv_i64 reg = cpu_reg(s, a->rd);
3055 TCGv_i64 val = tcg_temp_new_i64();
3056
3057 do_cntp(s, val, a->esz, a->pg, a->pg);
3058 do_sat_addsub_64(reg, val, a->u, a->d);
3059 }
3060 return true;
3061}
3062
3a7be554 3063static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3064{
3065 if (a->esz == 0) {
3066 return false;
3067 }
3068 if (sve_access_check(s)) {
3069 TCGv_i64 val = tcg_temp_new_i64();
3070 do_cntp(s, val, a->esz, a->pg, a->pg);
3071 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3072 }
3073 return true;
3074}
3075
caf1cefc
RH
3076/*
3077 *** SVE Integer Compare Scalars Group
3078 */
3079
3a7be554 3080static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3081{
3082 if (!sve_access_check(s)) {
3083 return true;
3084 }
3085
3086 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3087 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3088 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3089 TCGv_i64 cmp = tcg_temp_new_i64();
3090
3091 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3092 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3093 tcg_temp_free_i64(cmp);
3094
3095 /* VF = !NF & !CF. */
3096 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3097 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3098
3099 /* Both NF and VF actually look at bit 31. */
3100 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3101 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3102 return true;
3103}
3104
3a7be554 3105static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3106{
bbd0968c 3107 TCGv_i64 op0, op1, t0, t1, tmax;
4481bbf2 3108 TCGv_i32 t2;
caf1cefc 3109 TCGv_ptr ptr;
e610906c
RH
3110 unsigned vsz = vec_full_reg_size(s);
3111 unsigned desc = 0;
caf1cefc 3112 TCGCond cond;
34688dbc
RH
3113 uint64_t maxval;
3114 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3115 bool eq = a->eq == a->lt;
caf1cefc 3116
34688dbc
RH
3117 /* The greater-than conditions are all SVE2. */
3118 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3119 return false;
3120 }
bbd0968c
RH
3121 if (!sve_access_check(s)) {
3122 return true;
3123 }
3124
3125 op0 = read_cpu_reg(s, a->rn, 1);
3126 op1 = read_cpu_reg(s, a->rm, 1);
3127
caf1cefc
RH
3128 if (!a->sf) {
3129 if (a->u) {
3130 tcg_gen_ext32u_i64(op0, op0);
3131 tcg_gen_ext32u_i64(op1, op1);
3132 } else {
3133 tcg_gen_ext32s_i64(op0, op0);
3134 tcg_gen_ext32s_i64(op1, op1);
3135 }
3136 }
3137
3138 /* For the helper, compress the different conditions into a computation
3139 * of how many iterations for which the condition is true.
caf1cefc 3140 */
bbd0968c
RH
3141 t0 = tcg_temp_new_i64();
3142 t1 = tcg_temp_new_i64();
34688dbc
RH
3143
3144 if (a->lt) {
3145 tcg_gen_sub_i64(t0, op1, op0);
3146 if (a->u) {
3147 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3148 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3149 } else {
3150 maxval = a->sf ? INT64_MAX : INT32_MAX;
3151 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3152 }
3153 } else {
3154 tcg_gen_sub_i64(t0, op0, op1);
3155 if (a->u) {
3156 maxval = 0;
3157 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3158 } else {
3159 maxval = a->sf ? INT64_MIN : INT32_MIN;
3160 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3161 }
3162 }
caf1cefc 3163
4481bbf2 3164 tmax = tcg_constant_i64(vsz >> a->esz);
34688dbc 3165 if (eq) {
caf1cefc
RH
3166 /* Equality means one more iteration. */
3167 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c 3168
34688dbc
RH
3169 /*
3170 * For the less-than while, if op1 is maxval (and the only time
3171 * the addition above could overflow), then we produce an all-true
3172 * predicate by setting the count to the vector length. This is
3173 * because the pseudocode is described as an increment + compare
3174 * loop, and the maximum integer would always compare true.
3175 * Similarly, the greater-than while has the same issue with the
3176 * minimum integer due to the decrement + compare loop.
bbd0968c 3177 */
34688dbc 3178 tcg_gen_movi_i64(t1, maxval);
bbd0968c 3179 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3180 }
3181
bbd0968c
RH
3182 /* Bound to the maximum. */
3183 tcg_gen_umin_i64(t0, t0, tmax);
bbd0968c
RH
3184
3185 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3186 tcg_gen_movi_i64(t1, 0);
3187 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3188 tcg_temp_free_i64(t1);
caf1cefc 3189
bbd0968c 3190 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3191 t2 = tcg_temp_new_i32();
3192 tcg_gen_extrl_i64_i32(t2, t0);
3193 tcg_temp_free_i64(t0);
bbd0968c
RH
3194
3195 /* Scale elements to bits. */
3196 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3197
e610906c
RH
3198 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3199 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3200
3201 ptr = tcg_temp_new_ptr();
3202 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3203
34688dbc 3204 if (a->lt) {
4481bbf2 3205 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3206 } else {
4481bbf2 3207 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
34688dbc 3208 }
caf1cefc
RH
3209 do_pred_flags(t2);
3210
3211 tcg_temp_free_ptr(ptr);
3212 tcg_temp_free_i32(t2);
caf1cefc
RH
3213 return true;
3214}
3215
14f6dad1
RH
3216static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3217{
3218 TCGv_i64 op0, op1, diff, t1, tmax;
4481bbf2 3219 TCGv_i32 t2;
14f6dad1
RH
3220 TCGv_ptr ptr;
3221 unsigned vsz = vec_full_reg_size(s);
3222 unsigned desc = 0;
3223
3224 if (!dc_isar_feature(aa64_sve2, s)) {
3225 return false;
3226 }
3227 if (!sve_access_check(s)) {
3228 return true;
3229 }
3230
3231 op0 = read_cpu_reg(s, a->rn, 1);
3232 op1 = read_cpu_reg(s, a->rm, 1);
3233
4481bbf2 3234 tmax = tcg_constant_i64(vsz);
14f6dad1
RH
3235 diff = tcg_temp_new_i64();
3236
3237 if (a->rw) {
3238 /* WHILERW */
3239 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3240 t1 = tcg_temp_new_i64();
3241 tcg_gen_sub_i64(diff, op0, op1);
3242 tcg_gen_sub_i64(t1, op1, op0);
3243 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3244 tcg_temp_free_i64(t1);
3245 /* Round down to a multiple of ESIZE. */
3246 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3247 /* If op1 == op0, diff == 0, and the condition is always true. */
3248 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3249 } else {
3250 /* WHILEWR */
3251 tcg_gen_sub_i64(diff, op1, op0);
3252 /* Round down to a multiple of ESIZE. */
3253 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3254 /* If op0 >= op1, diff <= 0, the condition is always true. */
3255 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3256 }
3257
3258 /* Bound to the maximum. */
3259 tcg_gen_umin_i64(diff, diff, tmax);
14f6dad1
RH
3260
3261 /* Since we're bounded, pass as a 32-bit type. */
3262 t2 = tcg_temp_new_i32();
3263 tcg_gen_extrl_i64_i32(t2, diff);
3264 tcg_temp_free_i64(diff);
3265
3266 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3267 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
14f6dad1
RH
3268
3269 ptr = tcg_temp_new_ptr();
3270 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3271
4481bbf2 3272 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
14f6dad1
RH
3273 do_pred_flags(t2);
3274
3275 tcg_temp_free_ptr(ptr);
3276 tcg_temp_free_i32(t2);
14f6dad1
RH
3277 return true;
3278}
3279
ed491961
RH
3280/*
3281 *** SVE Integer Wide Immediate - Unpredicated Group
3282 */
3283
3a7be554 3284static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3285{
3286 if (a->esz == 0) {
3287 return false;
3288 }
3289 if (sve_access_check(s)) {
3290 unsigned vsz = vec_full_reg_size(s);
3291 int dofs = vec_full_reg_offset(s, a->rd);
3292 uint64_t imm;
3293
3294 /* Decode the VFP immediate. */
3295 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3296 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3297 }
3298 return true;
3299}
3300
3a7be554 3301static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3302{
ed491961
RH
3303 if (sve_access_check(s)) {
3304 unsigned vsz = vec_full_reg_size(s);
3305 int dofs = vec_full_reg_offset(s, a->rd);
8711e71f 3306 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3307 }
3308 return true;
3309}
3310
48ca613d 3311TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
6e6a157d 3312
3a7be554 3313static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3314{
3315 a->imm = -a->imm;
3a7be554 3316 return trans_ADD_zzi(s, a);
6e6a157d
RH
3317}
3318
3a7be554 3319static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3320{
53229a77 3321 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3322 static const GVecGen2s op[4] = {
3323 { .fni8 = tcg_gen_vec_sub8_i64,
3324 .fniv = tcg_gen_sub_vec,
3325 .fno = gen_helper_sve_subri_b,
53229a77 3326 .opt_opc = vecop_list,
6e6a157d
RH
3327 .vece = MO_8,
3328 .scalar_first = true },
3329 { .fni8 = tcg_gen_vec_sub16_i64,
3330 .fniv = tcg_gen_sub_vec,
3331 .fno = gen_helper_sve_subri_h,
53229a77 3332 .opt_opc = vecop_list,
6e6a157d
RH
3333 .vece = MO_16,
3334 .scalar_first = true },
3335 { .fni4 = tcg_gen_sub_i32,
3336 .fniv = tcg_gen_sub_vec,
3337 .fno = gen_helper_sve_subri_s,
53229a77 3338 .opt_opc = vecop_list,
6e6a157d
RH
3339 .vece = MO_32,
3340 .scalar_first = true },
3341 { .fni8 = tcg_gen_sub_i64,
3342 .fniv = tcg_gen_sub_vec,
3343 .fno = gen_helper_sve_subri_d,
53229a77 3344 .opt_opc = vecop_list,
6e6a157d
RH
3345 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3346 .vece = MO_64,
3347 .scalar_first = true }
3348 };
3349
6e6a157d
RH
3350 if (sve_access_check(s)) {
3351 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3352 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3353 vec_full_reg_offset(s, a->rn),
9fff3fcc 3354 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
6e6a157d
RH
3355 }
3356 return true;
3357}
3358
fa4bd72c 3359TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
6e6a157d 3360
3a7be554 3361static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3362{
6e6a157d 3363 if (sve_access_check(s)) {
138a1f7b
RH
3364 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3365 tcg_constant_i64(a->imm), u, d);
6e6a157d
RH
3366 }
3367 return true;
3368}
3369
17b54d1c
RH
3370TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3371TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3372TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3373TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
6e6a157d
RH
3374
3375static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3376{
3377 if (sve_access_check(s)) {
3378 unsigned vsz = vec_full_reg_size(s);
6e6a157d
RH
3379 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3380 vec_full_reg_offset(s, a->rn),
138a1f7b 3381 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
6e6a157d
RH
3382 }
3383 return true;
3384}
3385
3386#define DO_ZZI(NAME, name) \
ef4a3958 3387 static gen_helper_gvec_2i * const name##i_fns[4] = { \
6e6a157d
RH
3388 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3389 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3390 }; \
ef4a3958 3391 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
6e6a157d
RH
3392
3393DO_ZZI(SMAX, smax)
3394DO_ZZI(UMAX, umax)
3395DO_ZZI(SMIN, smin)
3396DO_ZZI(UMIN, umin)
3397
3398#undef DO_ZZI
3399
5f425b92
RH
3400static gen_helper_gvec_4 * const dot_fns[2][2] = {
3401 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3402 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3403};
3404TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3405 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
d730ecaa 3406
814d4c52
RH
3407/*
3408 * SVE Multiply - Indexed
3409 */
3410
f3500a25
RH
3411TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3412 gen_helper_gvec_sdot_idx_b, a)
3413TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3414 gen_helper_gvec_sdot_idx_h, a)
3415TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3416 gen_helper_gvec_udot_idx_b, a)
3417TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3418 gen_helper_gvec_udot_idx_h, a)
3419
3420TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3421 gen_helper_gvec_sudot_idx_b, a)
3422TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3423 gen_helper_gvec_usdot_idx_b, a)
16fcfdc7 3424
814d4c52 3425#define DO_SVE2_RRX(NAME, FUNC) \
af031f64
RH
3426 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3427 a->rd, a->rn, a->rm, a->index)
814d4c52 3428
af031f64
RH
3429DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3430DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3431DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
814d4c52 3432
af031f64
RH
3433DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3434DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3435DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
1aee2d70 3436
af031f64
RH
3437DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3438DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3439DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
1aee2d70 3440
814d4c52
RH
3441#undef DO_SVE2_RRX
3442
b95f5eeb 3443#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
af031f64
RH
3444 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3445 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3446
3447DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3448DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3449DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3450DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3451
3452DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3453DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3454DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3455DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3456
3457DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3458DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3459DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3460DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
d3949c4c 3461
b95f5eeb
RH
3462#undef DO_SVE2_RRX_TB
3463
8a02aac7 3464#define DO_SVE2_RRXR(NAME, FUNC) \
8681eb76 3465 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
8a02aac7 3466
8681eb76
RH
3467DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3468DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3469DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
8a02aac7 3470
8681eb76
RH
3471DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3472DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3473DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
8a02aac7 3474
8681eb76
RH
3475DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3476DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3477DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
75d6d5fc 3478
8681eb76
RH
3479DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3480DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3481DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
75d6d5fc 3482
8a02aac7
RH
3483#undef DO_SVE2_RRXR
3484
c5c455d7 3485#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
8681eb76
RH
3486 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3487 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3488
3489DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3490DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3491DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3492DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3493
3494DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3495DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3496DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3497DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3498
3499DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3500DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3501DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3502DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3503
3504DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3505DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3506DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3507DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3508
3509DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3510DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3511DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3512DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3513
3514DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3515DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3516DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3517DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
c5c455d7
RH
3518
3519#undef DO_SVE2_RRXR_TB
3520
3b787ed8 3521#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
8681eb76
RH
3522 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3523 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3b787ed8
RH
3524
3525DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3526DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3527
3528DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3529DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3530
21068f39
RH
3531DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3532DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3533
3b787ed8
RH
3534#undef DO_SVE2_RRXR_ROT
3535
ca40a6e6
RH
3536/*
3537 *** SVE Floating Point Multiply-Add Indexed Group
3538 */
3539
0a82d963 3540static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
ca40a6e6 3541{
41bf9b67
RH
3542 static gen_helper_gvec_4_ptr * const fns[4] = {
3543 NULL,
ca40a6e6
RH
3544 gen_helper_gvec_fmla_idx_h,
3545 gen_helper_gvec_fmla_idx_s,
3546 gen_helper_gvec_fmla_idx_d,
3547 };
41bf9b67
RH
3548 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3549 (a->index << 1) | sub,
3550 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3551}
3552
3b879c28
RH
3553TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3554TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
0a82d963 3555
ca40a6e6
RH
3556/*
3557 *** SVE Floating Point Multiply Indexed Group
3558 */
3559
9c99ef66
RH
3560static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3561 NULL, gen_helper_gvec_fmul_idx_h,
3562 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3563};
3564TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3565 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3566 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
ca40a6e6 3567
23fbe79f
RH
3568/*
3569 *** SVE Floating Point Fast Reduction Group
3570 */
3571
3572typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3573 TCGv_ptr, TCGv_i32);
3574
3575static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3576 gen_helper_fp_reduce *fn)
3577{
3578 unsigned vsz = vec_full_reg_size(s);
3579 unsigned p2vsz = pow2ceil(vsz);
c6a59b55 3580 TCGv_i32 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3581 TCGv_ptr t_zn, t_pg, status;
3582 TCGv_i64 temp;
3583
3584 temp = tcg_temp_new_i64();
3585 t_zn = tcg_temp_new_ptr();
3586 t_pg = tcg_temp_new_ptr();
3587
3588 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3589 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3590 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3591
3592 fn(temp, t_zn, t_pg, status, t_desc);
3593 tcg_temp_free_ptr(t_zn);
3594 tcg_temp_free_ptr(t_pg);
3595 tcg_temp_free_ptr(status);
23fbe79f
RH
3596
3597 write_fp_dreg(s, a->rd, temp);
3598 tcg_temp_free_i64(temp);
3599}
3600
3601#define DO_VPZ(NAME, name) \
3a7be554 3602static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3603{ \
3604 static gen_helper_fp_reduce * const fns[3] = { \
3605 gen_helper_sve_##name##_h, \
3606 gen_helper_sve_##name##_s, \
3607 gen_helper_sve_##name##_d, \
3608 }; \
3609 if (a->esz == 0) { \
3610 return false; \
3611 } \
3612 if (sve_access_check(s)) { \
3613 do_reduce(s, a, fns[a->esz - 1]); \
3614 } \
3615 return true; \
3616}
3617
3618DO_VPZ(FADDV, faddv)
3619DO_VPZ(FMINNMV, fminnmv)
3620DO_VPZ(FMAXNMV, fmaxnmv)
3621DO_VPZ(FMINV, fminv)
3622DO_VPZ(FMAXV, fmaxv)
3623
3887c038
RH
3624/*
3625 *** SVE Floating Point Unary Operations - Unpredicated Group
3626 */
3627
3628static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3629{
3630 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3631 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3632
3633 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3634 vec_full_reg_offset(s, a->rn),
3635 status, vsz, vsz, 0, fn);
3636 tcg_temp_free_ptr(status);
3637}
3638
3a7be554 3639static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3640{
3641 static gen_helper_gvec_2_ptr * const fns[3] = {
3642 gen_helper_gvec_frecpe_h,
3643 gen_helper_gvec_frecpe_s,
3644 gen_helper_gvec_frecpe_d,
3645 };
3646 if (a->esz == 0) {
3647 return false;
3648 }
3649 if (sve_access_check(s)) {
3650 do_zz_fp(s, a, fns[a->esz - 1]);
3651 }
3652 return true;
3653}
3654
3a7be554 3655static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3656{
3657 static gen_helper_gvec_2_ptr * const fns[3] = {
3658 gen_helper_gvec_frsqrte_h,
3659 gen_helper_gvec_frsqrte_s,
3660 gen_helper_gvec_frsqrte_d,
3661 };
3662 if (a->esz == 0) {
3663 return false;
3664 }
3665 if (sve_access_check(s)) {
3666 do_zz_fp(s, a, fns[a->esz - 1]);
3667 }
3668 return true;
3669}
3670
4d2e2a03
RH
3671/*
3672 *** SVE Floating Point Compare with Zero Group
3673 */
3674
3675static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3676 gen_helper_gvec_3_ptr *fn)
3677{
3678 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3679 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3680
3681 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3682 vec_full_reg_offset(s, a->rn),
3683 pred_full_reg_offset(s, a->pg),
3684 status, vsz, vsz, 0, fn);
3685 tcg_temp_free_ptr(status);
3686}
3687
3688#define DO_PPZ(NAME, name) \
3a7be554 3689static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3690{ \
3691 static gen_helper_gvec_3_ptr * const fns[3] = { \
3692 gen_helper_sve_##name##_h, \
3693 gen_helper_sve_##name##_s, \
3694 gen_helper_sve_##name##_d, \
3695 }; \
3696 if (a->esz == 0) { \
3697 return false; \
3698 } \
3699 if (sve_access_check(s)) { \
3700 do_ppz_fp(s, a, fns[a->esz - 1]); \
3701 } \
3702 return true; \
3703}
3704
3705DO_PPZ(FCMGE_ppz0, fcmge0)
3706DO_PPZ(FCMGT_ppz0, fcmgt0)
3707DO_PPZ(FCMLE_ppz0, fcmle0)
3708DO_PPZ(FCMLT_ppz0, fcmlt0)
3709DO_PPZ(FCMEQ_ppz0, fcmeq0)
3710DO_PPZ(FCMNE_ppz0, fcmne0)
3711
3712#undef DO_PPZ
3713
67fcd9ad
RH
3714/*
3715 *** SVE floating-point trig multiply-add coefficient
3716 */
3717
cdd85923
RH
3718static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3719 NULL, gen_helper_sve_ftmad_h,
3720 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3721};
3722TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3723 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3724 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
67fcd9ad 3725
7f9ddf64
RH
3726/*
3727 *** SVE Floating Point Accumulating Reduction Group
3728 */
3729
3a7be554 3730static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3731{
3732 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3733 TCGv_ptr, TCGv_ptr, TCGv_i32);
3734 static fadda_fn * const fns[3] = {
3735 gen_helper_sve_fadda_h,
3736 gen_helper_sve_fadda_s,
3737 gen_helper_sve_fadda_d,
3738 };
3739 unsigned vsz = vec_full_reg_size(s);
3740 TCGv_ptr t_rm, t_pg, t_fpst;
3741 TCGv_i64 t_val;
3742 TCGv_i32 t_desc;
3743
3744 if (a->esz == 0) {
3745 return false;
3746 }
3747 if (!sve_access_check(s)) {
3748 return true;
3749 }
3750
3751 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3752 t_rm = tcg_temp_new_ptr();
3753 t_pg = tcg_temp_new_ptr();
3754 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3755 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3756 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3757 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
7f9ddf64
RH
3758
3759 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3760
7f9ddf64
RH
3761 tcg_temp_free_ptr(t_fpst);
3762 tcg_temp_free_ptr(t_pg);
3763 tcg_temp_free_ptr(t_rm);
3764
3765 write_fp_dreg(s, a->rd, t_val);
3766 tcg_temp_free_i64(t_val);
3767 return true;
3768}
3769
29b80469
RH
3770/*
3771 *** SVE Floating Point Arithmetic - Unpredicated Group
3772 */
3773
29b80469 3774#define DO_FP3(NAME, name) \
bdd4ce0d 3775 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
29b80469
RH
3776 NULL, gen_helper_gvec_##name##_h, \
3777 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3778 }; \
bdd4ce0d 3779 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
29b80469
RH
3780
3781DO_FP3(FADD_zzz, fadd)
3782DO_FP3(FSUB_zzz, fsub)
3783DO_FP3(FMUL_zzz, fmul)
3784DO_FP3(FTSMUL, ftsmul)
3785DO_FP3(FRECPS, recps)
3786DO_FP3(FRSQRTS, rsqrts)
3787
3788#undef DO_FP3
3789
ec3b87c2
RH
3790/*
3791 *** SVE Floating Point Arithmetic - Predicated Group
3792 */
3793
3794static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3795 gen_helper_gvec_4_ptr *fn)
3796{
3797 if (fn == NULL) {
3798 return false;
3799 }
3800 if (sve_access_check(s)) {
3801 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3802 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3803 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3804 vec_full_reg_offset(s, a->rn),
3805 vec_full_reg_offset(s, a->rm),
3806 pred_full_reg_offset(s, a->pg),
3807 status, vsz, vsz, 0, fn);
3808 tcg_temp_free_ptr(status);
3809 }
3810 return true;
3811}
3812
3813#define DO_FP3(NAME, name) \
3a7be554 3814static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3815{ \
3816 static gen_helper_gvec_4_ptr * const fns[4] = { \
3817 NULL, gen_helper_sve_##name##_h, \
3818 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3819 }; \
3820 return do_zpzz_fp(s, a, fns[a->esz]); \
3821}
3822
3823DO_FP3(FADD_zpzz, fadd)
3824DO_FP3(FSUB_zpzz, fsub)
3825DO_FP3(FMUL_zpzz, fmul)
3826DO_FP3(FMIN_zpzz, fmin)
3827DO_FP3(FMAX_zpzz, fmax)
3828DO_FP3(FMINNM_zpzz, fminnum)
3829DO_FP3(FMAXNM_zpzz, fmaxnum)
3830DO_FP3(FABD, fabd)
3831DO_FP3(FSCALE, fscalbn)
3832DO_FP3(FDIV, fdiv)
3833DO_FP3(FMULX, fmulx)
3834
3835#undef DO_FP3
8092c6a3 3836
cc48affe
RH
3837typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3838 TCGv_i64, TCGv_ptr, TCGv_i32);
3839
3840static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3841 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3842{
3843 unsigned vsz = vec_full_reg_size(s);
3844 TCGv_ptr t_zd, t_zn, t_pg, status;
3845 TCGv_i32 desc;
3846
3847 t_zd = tcg_temp_new_ptr();
3848 t_zn = tcg_temp_new_ptr();
3849 t_pg = tcg_temp_new_ptr();
3850 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3851 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3852 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3853
cdfb22bb 3854 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
c6a59b55 3855 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
cc48affe
RH
3856 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3857
cc48affe
RH
3858 tcg_temp_free_ptr(status);
3859 tcg_temp_free_ptr(t_pg);
3860 tcg_temp_free_ptr(t_zn);
3861 tcg_temp_free_ptr(t_zd);
3862}
3863
3864static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3865 gen_helper_sve_fp2scalar *fn)
3866{
138a1f7b
RH
3867 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3868 tcg_constant_i64(imm), fn);
cc48affe
RH
3869}
3870
3871#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3872static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3873{ \
3874 static gen_helper_sve_fp2scalar * const fns[3] = { \
3875 gen_helper_sve_##name##_h, \
3876 gen_helper_sve_##name##_s, \
3877 gen_helper_sve_##name##_d \
3878 }; \
3879 static uint64_t const val[3][2] = { \
3880 { float16_##const0, float16_##const1 }, \
3881 { float32_##const0, float32_##const1 }, \
3882 { float64_##const0, float64_##const1 }, \
3883 }; \
3884 if (a->esz == 0) { \
3885 return false; \
3886 } \
3887 if (sve_access_check(s)) { \
3888 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3889 } \
3890 return true; \
3891}
3892
cc48affe
RH
3893DO_FP_IMM(FADD, fadds, half, one)
3894DO_FP_IMM(FSUB, fsubs, half, one)
3895DO_FP_IMM(FMUL, fmuls, half, two)
3896DO_FP_IMM(FSUBR, fsubrs, half, one)
3897DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3898DO_FP_IMM(FMINNM, fminnms, zero, one)
3899DO_FP_IMM(FMAX, fmaxs, zero, one)
3900DO_FP_IMM(FMIN, fmins, zero, one)
3901
3902#undef DO_FP_IMM
3903
abfdefd5
RH
3904static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3905 gen_helper_gvec_4_ptr *fn)
3906{
3907 if (fn == NULL) {
3908 return false;
3909 }
3910 if (sve_access_check(s)) {
3911 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3912 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3913 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3914 vec_full_reg_offset(s, a->rn),
3915 vec_full_reg_offset(s, a->rm),
3916 pred_full_reg_offset(s, a->pg),
3917 status, vsz, vsz, 0, fn);
3918 tcg_temp_free_ptr(status);
3919 }
3920 return true;
3921}
3922
3923#define DO_FPCMP(NAME, name) \
3a7be554 3924static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3925{ \
3926 static gen_helper_gvec_4_ptr * const fns[4] = { \
3927 NULL, gen_helper_sve_##name##_h, \
3928 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3929 }; \
3930 return do_fp_cmp(s, a, fns[a->esz]); \
3931}
3932
3933DO_FPCMP(FCMGE, fcmge)
3934DO_FPCMP(FCMGT, fcmgt)
3935DO_FPCMP(FCMEQ, fcmeq)
3936DO_FPCMP(FCMNE, fcmne)
3937DO_FPCMP(FCMUO, fcmuo)
3938DO_FPCMP(FACGE, facge)
3939DO_FPCMP(FACGT, facgt)
3940
3941#undef DO_FPCMP
3942
3a7be554 3943static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3944{
3945 static gen_helper_gvec_4_ptr * const fns[3] = {
3946 gen_helper_sve_fcadd_h,
3947 gen_helper_sve_fcadd_s,
3948 gen_helper_sve_fcadd_d
3949 };
3950
3951 if (a->esz == 0) {
3952 return false;
3953 }
3954 if (sve_access_check(s)) {
3955 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3956 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3957 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3958 vec_full_reg_offset(s, a->rn),
3959 vec_full_reg_offset(s, a->rm),
3960 pred_full_reg_offset(s, a->pg),
3961 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3962 tcg_temp_free_ptr(status);
3963 }
3964 return true;
3965}
3966
08975da9
RH
3967static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3968 gen_helper_gvec_5_ptr *fn)
6ceabaad 3969{
08975da9 3970 if (a->esz == 0) {
6ceabaad
RH
3971 return false;
3972 }
08975da9
RH
3973 if (sve_access_check(s)) {
3974 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3975 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3976 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3977 vec_full_reg_offset(s, a->rn),
3978 vec_full_reg_offset(s, a->rm),
3979 vec_full_reg_offset(s, a->ra),
3980 pred_full_reg_offset(s, a->pg),
3981 status, vsz, vsz, 0, fn);
3982 tcg_temp_free_ptr(status);
6ceabaad 3983 }
6ceabaad
RH
3984 return true;
3985}
3986
3987#define DO_FMLA(NAME, name) \
3a7be554 3988static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 3989{ \
08975da9 3990 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
3991 NULL, gen_helper_sve_##name##_h, \
3992 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3993 }; \
3994 return do_fmla(s, a, fns[a->esz]); \
3995}
3996
3997DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3998DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3999DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4000DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4001
4002#undef DO_FMLA
4003
3a7be554 4004static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 4005{
08975da9
RH
4006 static gen_helper_gvec_5_ptr * const fns[4] = {
4007 NULL,
05f48bab
RH
4008 gen_helper_sve_fcmla_zpzzz_h,
4009 gen_helper_sve_fcmla_zpzzz_s,
4010 gen_helper_sve_fcmla_zpzzz_d,
4011 };
4012
4013 if (a->esz == 0) {
4014 return false;
4015 }
4016 if (sve_access_check(s)) {
4017 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4018 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
4019 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4020 vec_full_reg_offset(s, a->rn),
4021 vec_full_reg_offset(s, a->rm),
4022 vec_full_reg_offset(s, a->ra),
4023 pred_full_reg_offset(s, a->pg),
4024 status, vsz, vsz, a->rot, fns[a->esz]);
4025 tcg_temp_free_ptr(status);
05f48bab
RH
4026 }
4027 return true;
4028}
4029
3a7be554 4030static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405 4031{
41bf9b67
RH
4032 static gen_helper_gvec_4_ptr * const fns[4] = {
4033 NULL,
18fc2405
RH
4034 gen_helper_gvec_fcmlah_idx,
4035 gen_helper_gvec_fcmlas_idx,
41bf9b67 4036 NULL,
18fc2405
RH
4037 };
4038
18fc2405 4039 tcg_debug_assert(a->rd == a->ra);
41bf9b67
RH
4040
4041 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
4042 a->index * 4 + a->rot,
4043 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
4044}
4045
8092c6a3
RH
4046/*
4047 *** SVE Floating Point Unary Operations Predicated Group
4048 */
4049
4050static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4051 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4052{
4053 if (sve_access_check(s)) {
4054 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 4055 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
4056 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4057 vec_full_reg_offset(s, rn),
4058 pred_full_reg_offset(s, pg),
4059 status, vsz, vsz, 0, fn);
4060 tcg_temp_free_ptr(status);
4061 }
4062 return true;
4063}
4064
3a7be554 4065static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4066{
e4ab5124 4067 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4068}
4069
3a7be554 4070static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4071{
4072 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4073}
4074
d29b17ca
RH
4075static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4076{
4077 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4078 return false;
4079 }
4080 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4081}
4082
3a7be554 4083static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4084{
e4ab5124 4085 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4086}
4087
3a7be554 4088static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4089{
4090 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4091}
4092
3a7be554 4093static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4094{
4095 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4096}
4097
3a7be554 4098static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4099{
4100 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4101}
4102
3a7be554 4103static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4104{
4105 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4106}
4107
3a7be554 4108static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4109{
4110 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4111}
4112
3a7be554 4113static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4114{
4115 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4116}
4117
3a7be554 4118static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4119{
4120 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4121}
4122
3a7be554 4123static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4124{
4125 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4126}
4127
3a7be554 4128static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4129{
4130 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4131}
4132
3a7be554 4133static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4134{
4135 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4136}
4137
3a7be554 4138static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4139{
4140 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4141}
4142
3a7be554 4143static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4144{
4145 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4146}
4147
3a7be554 4148static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4149{
4150 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4151}
4152
3a7be554 4153static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4154{
4155 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4156}
4157
3a7be554 4158static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4159{
4160 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4161}
4162
3a7be554 4163static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4164{
4165 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4166}
4167
3a7be554 4168static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4169{
4170 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4171}
4172
cda3c753
RH
4173static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4174 gen_helper_sve_frint_h,
4175 gen_helper_sve_frint_s,
4176 gen_helper_sve_frint_d
4177};
4178
3a7be554 4179static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4180{
4181 if (a->esz == 0) {
4182 return false;
4183 }
4184 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4185 frint_fns[a->esz - 1]);
4186}
4187
3a7be554 4188static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4189{
4190 static gen_helper_gvec_3_ptr * const fns[3] = {
4191 gen_helper_sve_frintx_h,
4192 gen_helper_sve_frintx_s,
4193 gen_helper_sve_frintx_d
4194 };
4195 if (a->esz == 0) {
4196 return false;
4197 }
4198 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4199}
4200
95365277
SL
4201static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4202 int mode, gen_helper_gvec_3_ptr *fn)
cda3c753 4203{
cda3c753
RH
4204 if (sve_access_check(s)) {
4205 unsigned vsz = vec_full_reg_size(s);
4206 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4207 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4208
4209 gen_helper_set_rmode(tmode, tmode, status);
4210
4211 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4212 vec_full_reg_offset(s, a->rn),
4213 pred_full_reg_offset(s, a->pg),
95365277 4214 status, vsz, vsz, 0, fn);
cda3c753
RH
4215
4216 gen_helper_set_rmode(tmode, tmode, status);
4217 tcg_temp_free_i32(tmode);
4218 tcg_temp_free_ptr(status);
4219 }
4220 return true;
4221}
4222
3a7be554 4223static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753 4224{
95365277
SL
4225 if (a->esz == 0) {
4226 return false;
4227 }
4228 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
cda3c753
RH
4229}
4230
3a7be554 4231static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753 4232{
95365277
SL
4233 if (a->esz == 0) {
4234 return false;
4235 }
4236 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
cda3c753
RH
4237}
4238
3a7be554 4239static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753 4240{
95365277
SL
4241 if (a->esz == 0) {
4242 return false;
4243 }
4244 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
cda3c753
RH
4245}
4246
3a7be554 4247static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753 4248{
95365277
SL
4249 if (a->esz == 0) {
4250 return false;
4251 }
4252 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
cda3c753
RH
4253}
4254
3a7be554 4255static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753 4256{
95365277
SL
4257 if (a->esz == 0) {
4258 return false;
4259 }
4260 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
cda3c753
RH
4261}
4262
3a7be554 4263static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4264{
4265 static gen_helper_gvec_3_ptr * const fns[3] = {
4266 gen_helper_sve_frecpx_h,
4267 gen_helper_sve_frecpx_s,
4268 gen_helper_sve_frecpx_d
4269 };
4270 if (a->esz == 0) {
4271 return false;
4272 }
4273 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4274}
4275
3a7be554 4276static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4277{
4278 static gen_helper_gvec_3_ptr * const fns[3] = {
4279 gen_helper_sve_fsqrt_h,
4280 gen_helper_sve_fsqrt_s,
4281 gen_helper_sve_fsqrt_d
4282 };
4283 if (a->esz == 0) {
4284 return false;
4285 }
4286 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4287}
4288
3a7be554 4289static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4290{
4291 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4292}
4293
3a7be554 4294static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4295{
4296 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4297}
4298
3a7be554 4299static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4300{
4301 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4302}
4303
3a7be554 4304static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4305{
4306 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4307}
4308
3a7be554 4309static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4310{
4311 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4312}
4313
3a7be554 4314static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4315{
4316 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4317}
4318
3a7be554 4319static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4320{
4321 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4322}
4323
3a7be554 4324static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4325{
4326 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4327}
4328
3a7be554 4329static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4330{
4331 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4332}
4333
3a7be554 4334static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4335{
4336 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4337}
4338
3a7be554 4339static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4340{
4341 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4342}
4343
3a7be554 4344static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4345{
4346 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4347}
4348
3a7be554 4349static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4350{
4351 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4352}
4353
3a7be554 4354static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4355{
4356 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4357}
4358
d1822297
RH
4359/*
4360 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4361 */
4362
4363/* Subroutine loading a vector register at VOFS of LEN bytes.
4364 * The load should begin at the address Rn + IMM.
4365 */
4366
19f2acc9 4367static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4368{
19f2acc9
RH
4369 int len_align = QEMU_ALIGN_DOWN(len, 8);
4370 int len_remain = len % 8;
4371 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4372 int midx = get_mem_index(s);
b2aa8879 4373 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4374
b2aa8879
RH
4375 dirty_addr = tcg_temp_new_i64();
4376 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4377 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4378 tcg_temp_free_i64(dirty_addr);
d1822297 4379
b2aa8879
RH
4380 /*
4381 * Note that unpredicated load/store of vector/predicate registers
d1822297 4382 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4383 * operations on larger quantities.
d1822297
RH
4384 * Attempt to keep code expansion to a minimum by limiting the
4385 * amount of unrolling done.
4386 */
4387 if (nparts <= 4) {
4388 int i;
4389
b2aa8879 4390 t0 = tcg_temp_new_i64();
d1822297 4391 for (i = 0; i < len_align; i += 8) {
fc313c64 4392 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
d1822297 4393 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4394 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4395 }
b2aa8879 4396 tcg_temp_free_i64(t0);
d1822297
RH
4397 } else {
4398 TCGLabel *loop = gen_new_label();
4399 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4400
b2aa8879
RH
4401 /* Copy the clean address into a local temp, live across the loop. */
4402 t0 = clean_addr;
4b4dc975 4403 clean_addr = new_tmp_a64_local(s);
b2aa8879 4404 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4405
b2aa8879 4406 gen_set_label(loop);
d1822297 4407
b2aa8879 4408 t0 = tcg_temp_new_i64();
fc313c64 4409 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
b2aa8879 4410 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4411
b2aa8879 4412 tp = tcg_temp_new_ptr();
d1822297
RH
4413 tcg_gen_add_ptr(tp, cpu_env, i);
4414 tcg_gen_addi_ptr(i, i, 8);
4415 tcg_gen_st_i64(t0, tp, vofs);
4416 tcg_temp_free_ptr(tp);
b2aa8879 4417 tcg_temp_free_i64(t0);
d1822297
RH
4418
4419 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4420 tcg_temp_free_ptr(i);
4421 }
4422
b2aa8879
RH
4423 /*
4424 * Predicate register loads can be any multiple of 2.
d1822297
RH
4425 * Note that we still store the entire 64-bit unit into cpu_env.
4426 */
4427 if (len_remain) {
b2aa8879 4428 t0 = tcg_temp_new_i64();
d1822297
RH
4429 switch (len_remain) {
4430 case 2:
4431 case 4:
4432 case 8:
b2aa8879
RH
4433 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4434 MO_LE | ctz32(len_remain));
d1822297
RH
4435 break;
4436
4437 case 6:
4438 t1 = tcg_temp_new_i64();
b2aa8879
RH
4439 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4440 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4441 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4442 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4443 tcg_temp_free_i64(t1);
4444 break;
4445
4446 default:
4447 g_assert_not_reached();
4448 }
4449 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4450 tcg_temp_free_i64(t0);
d1822297 4451 }
d1822297
RH
4452}
4453
5047c204 4454/* Similarly for stores. */
19f2acc9 4455static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4456{
19f2acc9
RH
4457 int len_align = QEMU_ALIGN_DOWN(len, 8);
4458 int len_remain = len % 8;
4459 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4460 int midx = get_mem_index(s);
bba87d0a 4461 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4462
bba87d0a
RH
4463 dirty_addr = tcg_temp_new_i64();
4464 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4465 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4466 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4467
4468 /* Note that unpredicated load/store of vector/predicate registers
4469 * are defined as a stream of bytes, which equates to little-endian
4470 * operations on larger quantities. There is no nice way to force
4471 * a little-endian store for aarch64_be-linux-user out of line.
4472 *
4473 * Attempt to keep code expansion to a minimum by limiting the
4474 * amount of unrolling done.
4475 */
4476 if (nparts <= 4) {
4477 int i;
4478
bba87d0a 4479 t0 = tcg_temp_new_i64();
5047c204
RH
4480 for (i = 0; i < len_align; i += 8) {
4481 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
fc313c64 4482 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
d8227b09 4483 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4484 }
bba87d0a 4485 tcg_temp_free_i64(t0);
5047c204
RH
4486 } else {
4487 TCGLabel *loop = gen_new_label();
bba87d0a 4488 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4489
bba87d0a
RH
4490 /* Copy the clean address into a local temp, live across the loop. */
4491 t0 = clean_addr;
4b4dc975 4492 clean_addr = new_tmp_a64_local(s);
bba87d0a 4493 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4494
bba87d0a 4495 gen_set_label(loop);
5047c204 4496
bba87d0a
RH
4497 t0 = tcg_temp_new_i64();
4498 tp = tcg_temp_new_ptr();
4499 tcg_gen_add_ptr(tp, cpu_env, i);
4500 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4501 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4502 tcg_temp_free_ptr(tp);
4503
fc313c64 4504 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
bba87d0a
RH
4505 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4506 tcg_temp_free_i64(t0);
5047c204
RH
4507
4508 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4509 tcg_temp_free_ptr(i);
4510 }
4511
4512 /* Predicate register stores can be any multiple of 2. */
4513 if (len_remain) {
bba87d0a 4514 t0 = tcg_temp_new_i64();
5047c204 4515 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4516
4517 switch (len_remain) {
4518 case 2:
4519 case 4:
4520 case 8:
bba87d0a
RH
4521 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4522 MO_LE | ctz32(len_remain));
5047c204
RH
4523 break;
4524
4525 case 6:
bba87d0a
RH
4526 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4527 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4528 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4529 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4530 break;
4531
4532 default:
4533 g_assert_not_reached();
4534 }
bba87d0a 4535 tcg_temp_free_i64(t0);
5047c204 4536 }
5047c204
RH
4537}
4538
3a7be554 4539static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4540{
4541 if (sve_access_check(s)) {
4542 int size = vec_full_reg_size(s);
4543 int off = vec_full_reg_offset(s, a->rd);
4544 do_ldr(s, off, size, a->rn, a->imm * size);
4545 }
4546 return true;
4547}
4548
3a7be554 4549static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4550{
4551 if (sve_access_check(s)) {
4552 int size = pred_full_reg_size(s);
4553 int off = pred_full_reg_offset(s, a->rd);
4554 do_ldr(s, off, size, a->rn, a->imm * size);
4555 }
4556 return true;
4557}
c4e7c493 4558
3a7be554 4559static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4560{
4561 if (sve_access_check(s)) {
4562 int size = vec_full_reg_size(s);
4563 int off = vec_full_reg_offset(s, a->rd);
4564 do_str(s, off, size, a->rn, a->imm * size);
4565 }
4566 return true;
4567}
4568
3a7be554 4569static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4570{
4571 if (sve_access_check(s)) {
4572 int size = pred_full_reg_size(s);
4573 int off = pred_full_reg_offset(s, a->rd);
4574 do_str(s, off, size, a->rn, a->imm * size);
4575 }
4576 return true;
4577}
4578
c4e7c493
RH
4579/*
4580 *** SVE Memory - Contiguous Load Group
4581 */
4582
4583/* The memory mode of the dtype. */
14776ab5 4584static const MemOp dtype_mop[16] = {
c4e7c493
RH
4585 MO_UB, MO_UB, MO_UB, MO_UB,
4586 MO_SL, MO_UW, MO_UW, MO_UW,
4587 MO_SW, MO_SW, MO_UL, MO_UL,
fc313c64 4588 MO_SB, MO_SB, MO_SB, MO_UQ
c4e7c493
RH
4589};
4590
4591#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4592
4593/* The vector element size of dtype. */
4594static const uint8_t dtype_esz[16] = {
4595 0, 1, 2, 3,
4596 3, 1, 2, 3,
4597 3, 2, 2, 3,
4598 3, 2, 1, 3
4599};
4600
4601static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4602 int dtype, uint32_t mte_n, bool is_write,
4603 gen_helper_gvec_mem *fn)
c4e7c493
RH
4604{
4605 unsigned vsz = vec_full_reg_size(s);
4606 TCGv_ptr t_pg;
206adacf 4607 int desc = 0;
c4e7c493 4608
206adacf
RH
4609 /*
4610 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4611 * registers as pointers, so encode the regno into the data field.
4612 * For consistency, do this even for LD1.
4613 */
9473d0ec 4614 if (s->mte_active[0]) {
206adacf
RH
4615 int msz = dtype_msz(dtype);
4616
4617 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4618 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4619 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4620 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4621 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4622 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4623 } else {
4624 addr = clean_data_tbi(s, addr);
206adacf 4625 }
9473d0ec 4626
206adacf 4627 desc = simd_desc(vsz, vsz, zt | desc);
c4e7c493
RH
4628 t_pg = tcg_temp_new_ptr();
4629
4630 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
c6a59b55 4631 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
c4e7c493
RH
4632
4633 tcg_temp_free_ptr(t_pg);
c4e7c493
RH
4634}
4635
c182c6db
RH
4636/* Indexed by [mte][be][dtype][nreg] */
4637static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4638 { /* mte inactive, little-endian */
4639 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4640 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4641 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4642 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4643 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4644
4645 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4646 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4647 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4648 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4649 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4650
4651 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4653 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4654 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4655 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4656
4657 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4658 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4659 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4660 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4661 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4662
4663 /* mte inactive, big-endian */
4664 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4665 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4666 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4667 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4668 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4669
4670 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4671 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4672 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4673 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4674 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4675
4676 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4677 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4678 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4679 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4680 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4681
4682 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4683 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4684 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4685 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4686 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4687
4688 { /* mte active, little-endian */
4689 { { gen_helper_sve_ld1bb_r_mte,
4690 gen_helper_sve_ld2bb_r_mte,
4691 gen_helper_sve_ld3bb_r_mte,
4692 gen_helper_sve_ld4bb_r_mte },
4693 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4694 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4695 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4696
4697 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4698 { gen_helper_sve_ld1hh_le_r_mte,
4699 gen_helper_sve_ld2hh_le_r_mte,
4700 gen_helper_sve_ld3hh_le_r_mte,
4701 gen_helper_sve_ld4hh_le_r_mte },
4702 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4703 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4704
4705 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4706 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4707 { gen_helper_sve_ld1ss_le_r_mte,
4708 gen_helper_sve_ld2ss_le_r_mte,
4709 gen_helper_sve_ld3ss_le_r_mte,
4710 gen_helper_sve_ld4ss_le_r_mte },
4711 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4712
4713 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4714 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4715 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4716 { gen_helper_sve_ld1dd_le_r_mte,
4717 gen_helper_sve_ld2dd_le_r_mte,
4718 gen_helper_sve_ld3dd_le_r_mte,
4719 gen_helper_sve_ld4dd_le_r_mte } },
4720
4721 /* mte active, big-endian */
4722 { { gen_helper_sve_ld1bb_r_mte,
4723 gen_helper_sve_ld2bb_r_mte,
4724 gen_helper_sve_ld3bb_r_mte,
4725 gen_helper_sve_ld4bb_r_mte },
4726 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4727 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4728 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4729
4730 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4731 { gen_helper_sve_ld1hh_be_r_mte,
4732 gen_helper_sve_ld2hh_be_r_mte,
4733 gen_helper_sve_ld3hh_be_r_mte,
4734 gen_helper_sve_ld4hh_be_r_mte },
4735 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4736 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4737
4738 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4739 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4740 { gen_helper_sve_ld1ss_be_r_mte,
4741 gen_helper_sve_ld2ss_be_r_mte,
4742 gen_helper_sve_ld3ss_be_r_mte,
4743 gen_helper_sve_ld4ss_be_r_mte },
4744 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4745
4746 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4747 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4748 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4749 { gen_helper_sve_ld1dd_be_r_mte,
4750 gen_helper_sve_ld2dd_be_r_mte,
4751 gen_helper_sve_ld3dd_be_r_mte,
4752 gen_helper_sve_ld4dd_be_r_mte } } },
4753};
4754
c4e7c493
RH
4755static void do_ld_zpa(DisasContext *s, int zt, int pg,
4756 TCGv_i64 addr, int dtype, int nreg)
4757{
206adacf 4758 gen_helper_gvec_mem *fn
c182c6db 4759 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4760
206adacf
RH
4761 /*
4762 * While there are holes in the table, they are not
c4e7c493
RH
4763 * accessible via the instruction encoding.
4764 */
4765 assert(fn != NULL);
206adacf 4766 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4767}
4768
3a7be554 4769static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4770{
4771 if (a->rm == 31) {
4772 return false;
4773 }
4774 if (sve_access_check(s)) {
4775 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4776 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4777 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4778 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4779 }
4780 return true;
4781}
4782
3a7be554 4783static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4784{
4785 if (sve_access_check(s)) {
4786 int vsz = vec_full_reg_size(s);
4787 int elements = vsz >> dtype_esz[a->dtype];
4788 TCGv_i64 addr = new_tmp_a64(s);
4789
4790 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4791 (a->imm * elements * (a->nreg + 1))
4792 << dtype_msz(a->dtype));
4793 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4794 }
4795 return true;
4796}
e2654d75 4797
3a7be554 4798static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4799{
aa13f7c3
RH
4800 static gen_helper_gvec_mem * const fns[2][2][16] = {
4801 { /* mte inactive, little-endian */
4802 { gen_helper_sve_ldff1bb_r,
4803 gen_helper_sve_ldff1bhu_r,
4804 gen_helper_sve_ldff1bsu_r,
4805 gen_helper_sve_ldff1bdu_r,
4806
4807 gen_helper_sve_ldff1sds_le_r,
4808 gen_helper_sve_ldff1hh_le_r,
4809 gen_helper_sve_ldff1hsu_le_r,
4810 gen_helper_sve_ldff1hdu_le_r,
4811
4812 gen_helper_sve_ldff1hds_le_r,
4813 gen_helper_sve_ldff1hss_le_r,
4814 gen_helper_sve_ldff1ss_le_r,
4815 gen_helper_sve_ldff1sdu_le_r,
4816
4817 gen_helper_sve_ldff1bds_r,
4818 gen_helper_sve_ldff1bss_r,
4819 gen_helper_sve_ldff1bhs_r,
4820 gen_helper_sve_ldff1dd_le_r },
4821
4822 /* mte inactive, big-endian */
4823 { gen_helper_sve_ldff1bb_r,
4824 gen_helper_sve_ldff1bhu_r,
4825 gen_helper_sve_ldff1bsu_r,
4826 gen_helper_sve_ldff1bdu_r,
4827
4828 gen_helper_sve_ldff1sds_be_r,
4829 gen_helper_sve_ldff1hh_be_r,
4830 gen_helper_sve_ldff1hsu_be_r,
4831 gen_helper_sve_ldff1hdu_be_r,
4832
4833 gen_helper_sve_ldff1hds_be_r,
4834 gen_helper_sve_ldff1hss_be_r,
4835 gen_helper_sve_ldff1ss_be_r,
4836 gen_helper_sve_ldff1sdu_be_r,
4837
4838 gen_helper_sve_ldff1bds_r,
4839 gen_helper_sve_ldff1bss_r,
4840 gen_helper_sve_ldff1bhs_r,
4841 gen_helper_sve_ldff1dd_be_r } },
4842
4843 { /* mte active, little-endian */
4844 { gen_helper_sve_ldff1bb_r_mte,
4845 gen_helper_sve_ldff1bhu_r_mte,
4846 gen_helper_sve_ldff1bsu_r_mte,
4847 gen_helper_sve_ldff1bdu_r_mte,
4848
4849 gen_helper_sve_ldff1sds_le_r_mte,
4850 gen_helper_sve_ldff1hh_le_r_mte,
4851 gen_helper_sve_ldff1hsu_le_r_mte,
4852 gen_helper_sve_ldff1hdu_le_r_mte,
4853
4854 gen_helper_sve_ldff1hds_le_r_mte,
4855 gen_helper_sve_ldff1hss_le_r_mte,
4856 gen_helper_sve_ldff1ss_le_r_mte,
4857 gen_helper_sve_ldff1sdu_le_r_mte,
4858
4859 gen_helper_sve_ldff1bds_r_mte,
4860 gen_helper_sve_ldff1bss_r_mte,
4861 gen_helper_sve_ldff1bhs_r_mte,
4862 gen_helper_sve_ldff1dd_le_r_mte },
4863
4864 /* mte active, big-endian */
4865 { gen_helper_sve_ldff1bb_r_mte,
4866 gen_helper_sve_ldff1bhu_r_mte,
4867 gen_helper_sve_ldff1bsu_r_mte,
4868 gen_helper_sve_ldff1bdu_r_mte,
4869
4870 gen_helper_sve_ldff1sds_be_r_mte,
4871 gen_helper_sve_ldff1hh_be_r_mte,
4872 gen_helper_sve_ldff1hsu_be_r_mte,
4873 gen_helper_sve_ldff1hdu_be_r_mte,
4874
4875 gen_helper_sve_ldff1hds_be_r_mte,
4876 gen_helper_sve_ldff1hss_be_r_mte,
4877 gen_helper_sve_ldff1ss_be_r_mte,
4878 gen_helper_sve_ldff1sdu_be_r_mte,
4879
4880 gen_helper_sve_ldff1bds_r_mte,
4881 gen_helper_sve_ldff1bss_r_mte,
4882 gen_helper_sve_ldff1bhs_r_mte,
4883 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4884 };
4885
4886 if (sve_access_check(s)) {
4887 TCGv_i64 addr = new_tmp_a64(s);
4888 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4889 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4890 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4891 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4892 }
4893 return true;
4894}
4895
3a7be554 4896static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4897{
aa13f7c3
RH
4898 static gen_helper_gvec_mem * const fns[2][2][16] = {
4899 { /* mte inactive, little-endian */
4900 { gen_helper_sve_ldnf1bb_r,
4901 gen_helper_sve_ldnf1bhu_r,
4902 gen_helper_sve_ldnf1bsu_r,
4903 gen_helper_sve_ldnf1bdu_r,
4904
4905 gen_helper_sve_ldnf1sds_le_r,
4906 gen_helper_sve_ldnf1hh_le_r,
4907 gen_helper_sve_ldnf1hsu_le_r,
4908 gen_helper_sve_ldnf1hdu_le_r,
4909
4910 gen_helper_sve_ldnf1hds_le_r,
4911 gen_helper_sve_ldnf1hss_le_r,
4912 gen_helper_sve_ldnf1ss_le_r,
4913 gen_helper_sve_ldnf1sdu_le_r,
4914
4915 gen_helper_sve_ldnf1bds_r,
4916 gen_helper_sve_ldnf1bss_r,
4917 gen_helper_sve_ldnf1bhs_r,
4918 gen_helper_sve_ldnf1dd_le_r },
4919
4920 /* mte inactive, big-endian */
4921 { gen_helper_sve_ldnf1bb_r,
4922 gen_helper_sve_ldnf1bhu_r,
4923 gen_helper_sve_ldnf1bsu_r,
4924 gen_helper_sve_ldnf1bdu_r,
4925
4926 gen_helper_sve_ldnf1sds_be_r,
4927 gen_helper_sve_ldnf1hh_be_r,
4928 gen_helper_sve_ldnf1hsu_be_r,
4929 gen_helper_sve_ldnf1hdu_be_r,
4930
4931 gen_helper_sve_ldnf1hds_be_r,
4932 gen_helper_sve_ldnf1hss_be_r,
4933 gen_helper_sve_ldnf1ss_be_r,
4934 gen_helper_sve_ldnf1sdu_be_r,
4935
4936 gen_helper_sve_ldnf1bds_r,
4937 gen_helper_sve_ldnf1bss_r,
4938 gen_helper_sve_ldnf1bhs_r,
4939 gen_helper_sve_ldnf1dd_be_r } },
4940
4941 { /* mte inactive, little-endian */
4942 { gen_helper_sve_ldnf1bb_r_mte,
4943 gen_helper_sve_ldnf1bhu_r_mte,
4944 gen_helper_sve_ldnf1bsu_r_mte,
4945 gen_helper_sve_ldnf1bdu_r_mte,
4946
4947 gen_helper_sve_ldnf1sds_le_r_mte,
4948 gen_helper_sve_ldnf1hh_le_r_mte,
4949 gen_helper_sve_ldnf1hsu_le_r_mte,
4950 gen_helper_sve_ldnf1hdu_le_r_mte,
4951
4952 gen_helper_sve_ldnf1hds_le_r_mte,
4953 gen_helper_sve_ldnf1hss_le_r_mte,
4954 gen_helper_sve_ldnf1ss_le_r_mte,
4955 gen_helper_sve_ldnf1sdu_le_r_mte,
4956
4957 gen_helper_sve_ldnf1bds_r_mte,
4958 gen_helper_sve_ldnf1bss_r_mte,
4959 gen_helper_sve_ldnf1bhs_r_mte,
4960 gen_helper_sve_ldnf1dd_le_r_mte },
4961
4962 /* mte inactive, big-endian */
4963 { gen_helper_sve_ldnf1bb_r_mte,
4964 gen_helper_sve_ldnf1bhu_r_mte,
4965 gen_helper_sve_ldnf1bsu_r_mte,
4966 gen_helper_sve_ldnf1bdu_r_mte,
4967
4968 gen_helper_sve_ldnf1sds_be_r_mte,
4969 gen_helper_sve_ldnf1hh_be_r_mte,
4970 gen_helper_sve_ldnf1hsu_be_r_mte,
4971 gen_helper_sve_ldnf1hdu_be_r_mte,
4972
4973 gen_helper_sve_ldnf1hds_be_r_mte,
4974 gen_helper_sve_ldnf1hss_be_r_mte,
4975 gen_helper_sve_ldnf1ss_be_r_mte,
4976 gen_helper_sve_ldnf1sdu_be_r_mte,
4977
4978 gen_helper_sve_ldnf1bds_r_mte,
4979 gen_helper_sve_ldnf1bss_r_mte,
4980 gen_helper_sve_ldnf1bhs_r_mte,
4981 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4982 };
4983
4984 if (sve_access_check(s)) {
4985 int vsz = vec_full_reg_size(s);
4986 int elements = vsz >> dtype_esz[a->dtype];
4987 int off = (a->imm * elements) << dtype_msz(a->dtype);
4988 TCGv_i64 addr = new_tmp_a64(s);
4989
4990 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4991 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4992 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4993 }
4994 return true;
4995}
1a039c7e 4996
c182c6db 4997static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
05abe304 4998{
05abe304
RH
4999 unsigned vsz = vec_full_reg_size(s);
5000 TCGv_ptr t_pg;
7924d239 5001 int poff;
05abe304
RH
5002
5003 /* Load the first quadword using the normal predicated load helpers. */
2a99ab2b
RH
5004 poff = pred_full_reg_offset(s, pg);
5005 if (vsz > 16) {
5006 /*
5007 * Zero-extend the first 16 bits of the predicate into a temporary.
5008 * This avoids triggering an assert making sure we don't have bits
5009 * set within a predicate beyond VQ, but we have lowered VQ to 1
5010 * for this load operation.
5011 */
5012 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5013#if HOST_BIG_ENDIAN
2a99ab2b
RH
5014 poff += 6;
5015#endif
5016 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5017
5018 poff = offsetof(CPUARMState, vfp.preg_tmp);
5019 tcg_gen_st_i64(tmp, cpu_env, poff);
5020 tcg_temp_free_i64(tmp);
5021 }
5022
05abe304 5023 t_pg = tcg_temp_new_ptr();
2a99ab2b 5024 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5025
c182c6db
RH
5026 gen_helper_gvec_mem *fn
5027 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
7924d239 5028 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
05abe304
RH
5029
5030 tcg_temp_free_ptr(t_pg);
05abe304
RH
5031
5032 /* Replicate that first quadword. */
5033 if (vsz > 16) {
7924d239
RH
5034 int doff = vec_full_reg_offset(s, zt);
5035 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
05abe304
RH
5036 }
5037}
5038
3a7be554 5039static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5040{
5041 if (a->rm == 31) {
5042 return false;
5043 }
5044 if (sve_access_check(s)) {
5045 int msz = dtype_msz(a->dtype);
5046 TCGv_i64 addr = new_tmp_a64(s);
5047 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5048 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
c182c6db 5049 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5050 }
5051 return true;
5052}
5053
3a7be554 5054static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5055{
5056 if (sve_access_check(s)) {
5057 TCGv_i64 addr = new_tmp_a64(s);
5058 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
c182c6db 5059 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
05abe304
RH
5060 }
5061 return true;
5062}
5063
12c563f6
RH
5064static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5065{
5066 unsigned vsz = vec_full_reg_size(s);
5067 unsigned vsz_r32;
5068 TCGv_ptr t_pg;
5069 int poff, doff;
5070
5071 if (vsz < 32) {
5072 /*
5073 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5074 * in the ARM pseudocode, which is the sve_access_check() done
5075 * in our caller. We should not now return false from the caller.
5076 */
5077 unallocated_encoding(s);
5078 return;
5079 }
5080
5081 /* Load the first octaword using the normal predicated load helpers. */
5082
5083 poff = pred_full_reg_offset(s, pg);
5084 if (vsz > 32) {
5085 /*
5086 * Zero-extend the first 32 bits of the predicate into a temporary.
5087 * This avoids triggering an assert making sure we don't have bits
5088 * set within a predicate beyond VQ, but we have lowered VQ to 2
5089 * for this load operation.
5090 */
5091 TCGv_i64 tmp = tcg_temp_new_i64();
e03b5686 5092#if HOST_BIG_ENDIAN
12c563f6
RH
5093 poff += 4;
5094#endif
5095 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5096
5097 poff = offsetof(CPUARMState, vfp.preg_tmp);
5098 tcg_gen_st_i64(tmp, cpu_env, poff);
5099 tcg_temp_free_i64(tmp);
5100 }
5101
5102 t_pg = tcg_temp_new_ptr();
5103 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5104
5105 gen_helper_gvec_mem *fn
5106 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5107 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5108
5109 tcg_temp_free_ptr(t_pg);
5110
5111 /*
5112 * Replicate that first octaword.
5113 * The replication happens in units of 32; if the full vector size
5114 * is not a multiple of 32, the final bits are zeroed.
5115 */
5116 doff = vec_full_reg_offset(s, zt);
5117 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5118 if (vsz >= 64) {
5119 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5120 }
5121 vsz -= vsz_r32;
5122 if (vsz) {
5123 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5124 }
5125}
5126
5127static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5128{
5129 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5130 return false;
5131 }
5132 if (a->rm == 31) {
5133 return false;
5134 }
5135 if (sve_access_check(s)) {
5136 TCGv_i64 addr = new_tmp_a64(s);
5137 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5138 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5139 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5140 }
5141 return true;
5142}
5143
5144static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5145{
5146 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5147 return false;
5148 }
5149 if (sve_access_check(s)) {
5150 TCGv_i64 addr = new_tmp_a64(s);
5151 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5152 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5153 }
5154 return true;
5155}
5156
68459864 5157/* Load and broadcast element. */
3a7be554 5158static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5159{
68459864
RH
5160 unsigned vsz = vec_full_reg_size(s);
5161 unsigned psz = pred_full_reg_size(s);
5162 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5163 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5164 TCGLabel *over;
4ac430e1 5165 TCGv_i64 temp, clean_addr;
68459864 5166
c0ed9166
RH
5167 if (!sve_access_check(s)) {
5168 return true;
5169 }
5170
5171 over = gen_new_label();
5172
68459864
RH
5173 /* If the guarding predicate has no bits set, no load occurs. */
5174 if (psz <= 8) {
5175 /* Reduce the pred_esz_masks value simply to reduce the
5176 * size of the code generated here.
5177 */
5178 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5179 temp = tcg_temp_new_i64();
5180 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5181 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5182 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5183 tcg_temp_free_i64(temp);
5184 } else {
5185 TCGv_i32 t32 = tcg_temp_new_i32();
5186 find_last_active(s, t32, esz, a->pg);
5187 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5188 tcg_temp_free_i32(t32);
5189 }
5190
5191 /* Load the data. */
5192 temp = tcg_temp_new_i64();
d0e372b0 5193 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5194 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5195
5196 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
0ca0f872 5197 finalize_memop(s, dtype_mop[a->dtype]));
68459864
RH
5198
5199 /* Broadcast to *all* elements. */
5200 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5201 vsz, vsz, temp);
5202 tcg_temp_free_i64(temp);
5203
5204 /* Zero the inactive elements. */
5205 gen_set_label(over);
60245996 5206 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5207}
5208
1a039c7e
RH
5209static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5210 int msz, int esz, int nreg)
5211{
71b9f394
RH
5212 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5213 { { { gen_helper_sve_st1bb_r,
5214 gen_helper_sve_st1bh_r,
5215 gen_helper_sve_st1bs_r,
5216 gen_helper_sve_st1bd_r },
5217 { NULL,
5218 gen_helper_sve_st1hh_le_r,
5219 gen_helper_sve_st1hs_le_r,
5220 gen_helper_sve_st1hd_le_r },
5221 { NULL, NULL,
5222 gen_helper_sve_st1ss_le_r,
5223 gen_helper_sve_st1sd_le_r },
5224 { NULL, NULL, NULL,
5225 gen_helper_sve_st1dd_le_r } },
5226 { { gen_helper_sve_st1bb_r,
5227 gen_helper_sve_st1bh_r,
5228 gen_helper_sve_st1bs_r,
5229 gen_helper_sve_st1bd_r },
5230 { NULL,
5231 gen_helper_sve_st1hh_be_r,
5232 gen_helper_sve_st1hs_be_r,
5233 gen_helper_sve_st1hd_be_r },
5234 { NULL, NULL,
5235 gen_helper_sve_st1ss_be_r,
5236 gen_helper_sve_st1sd_be_r },
5237 { NULL, NULL, NULL,
5238 gen_helper_sve_st1dd_be_r } } },
5239
5240 { { { gen_helper_sve_st1bb_r_mte,
5241 gen_helper_sve_st1bh_r_mte,
5242 gen_helper_sve_st1bs_r_mte,
5243 gen_helper_sve_st1bd_r_mte },
5244 { NULL,
5245 gen_helper_sve_st1hh_le_r_mte,
5246 gen_helper_sve_st1hs_le_r_mte,
5247 gen_helper_sve_st1hd_le_r_mte },
5248 { NULL, NULL,
5249 gen_helper_sve_st1ss_le_r_mte,
5250 gen_helper_sve_st1sd_le_r_mte },
5251 { NULL, NULL, NULL,
5252 gen_helper_sve_st1dd_le_r_mte } },
5253 { { gen_helper_sve_st1bb_r_mte,
5254 gen_helper_sve_st1bh_r_mte,
5255 gen_helper_sve_st1bs_r_mte,
5256 gen_helper_sve_st1bd_r_mte },
5257 { NULL,
5258 gen_helper_sve_st1hh_be_r_mte,
5259 gen_helper_sve_st1hs_be_r_mte,
5260 gen_helper_sve_st1hd_be_r_mte },
5261 { NULL, NULL,
5262 gen_helper_sve_st1ss_be_r_mte,
5263 gen_helper_sve_st1sd_be_r_mte },
5264 { NULL, NULL, NULL,
5265 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5266 };
71b9f394
RH
5267 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5268 { { { gen_helper_sve_st2bb_r,
5269 gen_helper_sve_st2hh_le_r,
5270 gen_helper_sve_st2ss_le_r,
5271 gen_helper_sve_st2dd_le_r },
5272 { gen_helper_sve_st3bb_r,
5273 gen_helper_sve_st3hh_le_r,
5274 gen_helper_sve_st3ss_le_r,
5275 gen_helper_sve_st3dd_le_r },
5276 { gen_helper_sve_st4bb_r,
5277 gen_helper_sve_st4hh_le_r,
5278 gen_helper_sve_st4ss_le_r,
5279 gen_helper_sve_st4dd_le_r } },
5280 { { gen_helper_sve_st2bb_r,
5281 gen_helper_sve_st2hh_be_r,
5282 gen_helper_sve_st2ss_be_r,
5283 gen_helper_sve_st2dd_be_r },
5284 { gen_helper_sve_st3bb_r,
5285 gen_helper_sve_st3hh_be_r,
5286 gen_helper_sve_st3ss_be_r,
5287 gen_helper_sve_st3dd_be_r },
5288 { gen_helper_sve_st4bb_r,
5289 gen_helper_sve_st4hh_be_r,
5290 gen_helper_sve_st4ss_be_r,
5291 gen_helper_sve_st4dd_be_r } } },
5292 { { { gen_helper_sve_st2bb_r_mte,
5293 gen_helper_sve_st2hh_le_r_mte,
5294 gen_helper_sve_st2ss_le_r_mte,
5295 gen_helper_sve_st2dd_le_r_mte },
5296 { gen_helper_sve_st3bb_r_mte,
5297 gen_helper_sve_st3hh_le_r_mte,
5298 gen_helper_sve_st3ss_le_r_mte,
5299 gen_helper_sve_st3dd_le_r_mte },
5300 { gen_helper_sve_st4bb_r_mte,
5301 gen_helper_sve_st4hh_le_r_mte,
5302 gen_helper_sve_st4ss_le_r_mte,
5303 gen_helper_sve_st4dd_le_r_mte } },
5304 { { gen_helper_sve_st2bb_r_mte,
5305 gen_helper_sve_st2hh_be_r_mte,
5306 gen_helper_sve_st2ss_be_r_mte,
5307 gen_helper_sve_st2dd_be_r_mte },
5308 { gen_helper_sve_st3bb_r_mte,
5309 gen_helper_sve_st3hh_be_r_mte,
5310 gen_helper_sve_st3ss_be_r_mte,
5311 gen_helper_sve_st3dd_be_r_mte },
5312 { gen_helper_sve_st4bb_r_mte,
5313 gen_helper_sve_st4hh_be_r_mte,
5314 gen_helper_sve_st4ss_be_r_mte,
5315 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5316 };
5317 gen_helper_gvec_mem *fn;
28d57f2d 5318 int be = s->be_data == MO_BE;
1a039c7e
RH
5319
5320 if (nreg == 0) {
5321 /* ST1 */
71b9f394
RH
5322 fn = fn_single[s->mte_active[0]][be][msz][esz];
5323 nreg = 1;
1a039c7e
RH
5324 } else {
5325 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5326 assert(msz == esz);
71b9f394 5327 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5328 }
5329 assert(fn != NULL);
71b9f394 5330 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5331}
5332
3a7be554 5333static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5334{
5335 if (a->rm == 31 || a->msz > a->esz) {
5336 return false;
5337 }
5338 if (sve_access_check(s)) {
5339 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5340 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5341 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5342 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5343 }
5344 return true;
5345}
5346
3a7be554 5347static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5348{
5349 if (a->msz > a->esz) {
5350 return false;
5351 }
5352 if (sve_access_check(s)) {
5353 int vsz = vec_full_reg_size(s);
5354 int elements = vsz >> a->esz;
5355 TCGv_i64 addr = new_tmp_a64(s);
5356
5357 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5358 (a->imm * elements * (a->nreg + 1)) << a->msz);
5359 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5360 }
5361 return true;
5362}
f6dbf62a
RH
5363
5364/*
5365 *** SVE gather loads / scatter stores
5366 */
5367
500d0484 5368static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5369 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5370 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5371{
5372 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5373 TCGv_ptr t_zm = tcg_temp_new_ptr();
5374 TCGv_ptr t_pg = tcg_temp_new_ptr();
5375 TCGv_ptr t_zt = tcg_temp_new_ptr();
d28d12f0 5376 int desc = 0;
500d0484 5377
d28d12f0
RH
5378 if (s->mte_active[0]) {
5379 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5380 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5381 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5382 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5383 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5384 desc <<= SVE_MTEDESC_SHIFT;
5385 }
cdecb3fc 5386 desc = simd_desc(vsz, vsz, desc | scale);
f6dbf62a
RH
5387
5388 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5389 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5390 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
c6a59b55 5391 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
f6dbf62a
RH
5392
5393 tcg_temp_free_ptr(t_zt);
5394 tcg_temp_free_ptr(t_zm);
5395 tcg_temp_free_ptr(t_pg);
f6dbf62a
RH
5396}
5397
d28d12f0
RH
5398/* Indexed by [mte][be][ff][xs][u][msz]. */
5399static gen_helper_gvec_mem_scatter * const
5400gather_load_fn32[2][2][2][2][2][3] = {
5401 { /* MTE Inactive */
5402 { /* Little-endian */
5403 { { { gen_helper_sve_ldbss_zsu,
5404 gen_helper_sve_ldhss_le_zsu,
5405 NULL, },
5406 { gen_helper_sve_ldbsu_zsu,
5407 gen_helper_sve_ldhsu_le_zsu,
5408 gen_helper_sve_ldss_le_zsu, } },
5409 { { gen_helper_sve_ldbss_zss,
5410 gen_helper_sve_ldhss_le_zss,
5411 NULL, },
5412 { gen_helper_sve_ldbsu_zss,
5413 gen_helper_sve_ldhsu_le_zss,
5414 gen_helper_sve_ldss_le_zss, } } },
5415
5416 /* First-fault */
5417 { { { gen_helper_sve_ldffbss_zsu,
5418 gen_helper_sve_ldffhss_le_zsu,
5419 NULL, },
5420 { gen_helper_sve_ldffbsu_zsu,
5421 gen_helper_sve_ldffhsu_le_zsu,
5422 gen_helper_sve_ldffss_le_zsu, } },
5423 { { gen_helper_sve_ldffbss_zss,
5424 gen_helper_sve_ldffhss_le_zss,
5425 NULL, },
5426 { gen_helper_sve_ldffbsu_zss,
5427 gen_helper_sve_ldffhsu_le_zss,
5428 gen_helper_sve_ldffss_le_zss, } } } },
5429
5430 { /* Big-endian */
5431 { { { gen_helper_sve_ldbss_zsu,
5432 gen_helper_sve_ldhss_be_zsu,
5433 NULL, },
5434 { gen_helper_sve_ldbsu_zsu,
5435 gen_helper_sve_ldhsu_be_zsu,
5436 gen_helper_sve_ldss_be_zsu, } },
5437 { { gen_helper_sve_ldbss_zss,
5438 gen_helper_sve_ldhss_be_zss,
5439 NULL, },
5440 { gen_helper_sve_ldbsu_zss,
5441 gen_helper_sve_ldhsu_be_zss,
5442 gen_helper_sve_ldss_be_zss, } } },
5443
5444 /* First-fault */
5445 { { { gen_helper_sve_ldffbss_zsu,
5446 gen_helper_sve_ldffhss_be_zsu,
5447 NULL, },
5448 { gen_helper_sve_ldffbsu_zsu,
5449 gen_helper_sve_ldffhsu_be_zsu,
5450 gen_helper_sve_ldffss_be_zsu, } },
5451 { { gen_helper_sve_ldffbss_zss,
5452 gen_helper_sve_ldffhss_be_zss,
5453 NULL, },
5454 { gen_helper_sve_ldffbsu_zss,
5455 gen_helper_sve_ldffhsu_be_zss,
5456 gen_helper_sve_ldffss_be_zss, } } } } },
5457 { /* MTE Active */
5458 { /* Little-endian */
5459 { { { gen_helper_sve_ldbss_zsu_mte,
5460 gen_helper_sve_ldhss_le_zsu_mte,
5461 NULL, },
5462 { gen_helper_sve_ldbsu_zsu_mte,
5463 gen_helper_sve_ldhsu_le_zsu_mte,
5464 gen_helper_sve_ldss_le_zsu_mte, } },
5465 { { gen_helper_sve_ldbss_zss_mte,
5466 gen_helper_sve_ldhss_le_zss_mte,
5467 NULL, },
5468 { gen_helper_sve_ldbsu_zss_mte,
5469 gen_helper_sve_ldhsu_le_zss_mte,
5470 gen_helper_sve_ldss_le_zss_mte, } } },
5471
5472 /* First-fault */
5473 { { { gen_helper_sve_ldffbss_zsu_mte,
5474 gen_helper_sve_ldffhss_le_zsu_mte,
5475 NULL, },
5476 { gen_helper_sve_ldffbsu_zsu_mte,
5477 gen_helper_sve_ldffhsu_le_zsu_mte,
5478 gen_helper_sve_ldffss_le_zsu_mte, } },
5479 { { gen_helper_sve_ldffbss_zss_mte,
5480 gen_helper_sve_ldffhss_le_zss_mte,
5481 NULL, },
5482 { gen_helper_sve_ldffbsu_zss_mte,
5483 gen_helper_sve_ldffhsu_le_zss_mte,
5484 gen_helper_sve_ldffss_le_zss_mte, } } } },
5485
5486 { /* Big-endian */
5487 { { { gen_helper_sve_ldbss_zsu_mte,
5488 gen_helper_sve_ldhss_be_zsu_mte,
5489 NULL, },
5490 { gen_helper_sve_ldbsu_zsu_mte,
5491 gen_helper_sve_ldhsu_be_zsu_mte,
5492 gen_helper_sve_ldss_be_zsu_mte, } },
5493 { { gen_helper_sve_ldbss_zss_mte,
5494 gen_helper_sve_ldhss_be_zss_mte,
5495 NULL, },
5496 { gen_helper_sve_ldbsu_zss_mte,
5497 gen_helper_sve_ldhsu_be_zss_mte,
5498 gen_helper_sve_ldss_be_zss_mte, } } },
5499
5500 /* First-fault */
5501 { { { gen_helper_sve_ldffbss_zsu_mte,
5502 gen_helper_sve_ldffhss_be_zsu_mte,
5503 NULL, },
5504 { gen_helper_sve_ldffbsu_zsu_mte,
5505 gen_helper_sve_ldffhsu_be_zsu_mte,
5506 gen_helper_sve_ldffss_be_zsu_mte, } },
5507 { { gen_helper_sve_ldffbss_zss_mte,
5508 gen_helper_sve_ldffhss_be_zss_mte,
5509 NULL, },
5510 { gen_helper_sve_ldffbsu_zss_mte,
5511 gen_helper_sve_ldffhsu_be_zss_mte,
5512 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5513};
5514
5515/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5516static gen_helper_gvec_mem_scatter * const
5517gather_load_fn64[2][2][2][3][2][4] = {
5518 { /* MTE Inactive */
5519 { /* Little-endian */
5520 { { { gen_helper_sve_ldbds_zsu,
5521 gen_helper_sve_ldhds_le_zsu,
5522 gen_helper_sve_ldsds_le_zsu,
5523 NULL, },
5524 { gen_helper_sve_ldbdu_zsu,
5525 gen_helper_sve_ldhdu_le_zsu,
5526 gen_helper_sve_ldsdu_le_zsu,
5527 gen_helper_sve_lddd_le_zsu, } },
5528 { { gen_helper_sve_ldbds_zss,
5529 gen_helper_sve_ldhds_le_zss,
5530 gen_helper_sve_ldsds_le_zss,
5531 NULL, },
5532 { gen_helper_sve_ldbdu_zss,
5533 gen_helper_sve_ldhdu_le_zss,
5534 gen_helper_sve_ldsdu_le_zss,
5535 gen_helper_sve_lddd_le_zss, } },
5536 { { gen_helper_sve_ldbds_zd,
5537 gen_helper_sve_ldhds_le_zd,
5538 gen_helper_sve_ldsds_le_zd,
5539 NULL, },
5540 { gen_helper_sve_ldbdu_zd,
5541 gen_helper_sve_ldhdu_le_zd,
5542 gen_helper_sve_ldsdu_le_zd,
5543 gen_helper_sve_lddd_le_zd, } } },
5544
5545 /* First-fault */
5546 { { { gen_helper_sve_ldffbds_zsu,
5547 gen_helper_sve_ldffhds_le_zsu,
5548 gen_helper_sve_ldffsds_le_zsu,
5549 NULL, },
5550 { gen_helper_sve_ldffbdu_zsu,
5551 gen_helper_sve_ldffhdu_le_zsu,
5552 gen_helper_sve_ldffsdu_le_zsu,
5553 gen_helper_sve_ldffdd_le_zsu, } },
5554 { { gen_helper_sve_ldffbds_zss,
5555 gen_helper_sve_ldffhds_le_zss,
5556 gen_helper_sve_ldffsds_le_zss,
5557 NULL, },
5558 { gen_helper_sve_ldffbdu_zss,
5559 gen_helper_sve_ldffhdu_le_zss,
5560 gen_helper_sve_ldffsdu_le_zss,
5561 gen_helper_sve_ldffdd_le_zss, } },
5562 { { gen_helper_sve_ldffbds_zd,
5563 gen_helper_sve_ldffhds_le_zd,
5564 gen_helper_sve_ldffsds_le_zd,
5565 NULL, },
5566 { gen_helper_sve_ldffbdu_zd,
5567 gen_helper_sve_ldffhdu_le_zd,
5568 gen_helper_sve_ldffsdu_le_zd,
5569 gen_helper_sve_ldffdd_le_zd, } } } },
5570 { /* Big-endian */
5571 { { { gen_helper_sve_ldbds_zsu,
5572 gen_helper_sve_ldhds_be_zsu,
5573 gen_helper_sve_ldsds_be_zsu,
5574 NULL, },
5575 { gen_helper_sve_ldbdu_zsu,
5576 gen_helper_sve_ldhdu_be_zsu,
5577 gen_helper_sve_ldsdu_be_zsu,
5578 gen_helper_sve_lddd_be_zsu, } },
5579 { { gen_helper_sve_ldbds_zss,
5580 gen_helper_sve_ldhds_be_zss,
5581 gen_helper_sve_ldsds_be_zss,
5582 NULL, },
5583 { gen_helper_sve_ldbdu_zss,
5584 gen_helper_sve_ldhdu_be_zss,
5585 gen_helper_sve_ldsdu_be_zss,
5586 gen_helper_sve_lddd_be_zss, } },
5587 { { gen_helper_sve_ldbds_zd,
5588 gen_helper_sve_ldhds_be_zd,
5589 gen_helper_sve_ldsds_be_zd,
5590 NULL, },
5591 { gen_helper_sve_ldbdu_zd,
5592 gen_helper_sve_ldhdu_be_zd,
5593 gen_helper_sve_ldsdu_be_zd,
5594 gen_helper_sve_lddd_be_zd, } } },
5595
5596 /* First-fault */
5597 { { { gen_helper_sve_ldffbds_zsu,
5598 gen_helper_sve_ldffhds_be_zsu,
5599 gen_helper_sve_ldffsds_be_zsu,
5600 NULL, },
5601 { gen_helper_sve_ldffbdu_zsu,
5602 gen_helper_sve_ldffhdu_be_zsu,
5603 gen_helper_sve_ldffsdu_be_zsu,
5604 gen_helper_sve_ldffdd_be_zsu, } },
5605 { { gen_helper_sve_ldffbds_zss,
5606 gen_helper_sve_ldffhds_be_zss,
5607 gen_helper_sve_ldffsds_be_zss,
5608 NULL, },
5609 { gen_helper_sve_ldffbdu_zss,
5610 gen_helper_sve_ldffhdu_be_zss,
5611 gen_helper_sve_ldffsdu_be_zss,
5612 gen_helper_sve_ldffdd_be_zss, } },
5613 { { gen_helper_sve_ldffbds_zd,
5614 gen_helper_sve_ldffhds_be_zd,
5615 gen_helper_sve_ldffsds_be_zd,
5616 NULL, },
5617 { gen_helper_sve_ldffbdu_zd,
5618 gen_helper_sve_ldffhdu_be_zd,
5619 gen_helper_sve_ldffsdu_be_zd,
5620 gen_helper_sve_ldffdd_be_zd, } } } } },
5621 { /* MTE Active */
5622 { /* Little-endian */
5623 { { { gen_helper_sve_ldbds_zsu_mte,
5624 gen_helper_sve_ldhds_le_zsu_mte,
5625 gen_helper_sve_ldsds_le_zsu_mte,
5626 NULL, },
5627 { gen_helper_sve_ldbdu_zsu_mte,
5628 gen_helper_sve_ldhdu_le_zsu_mte,
5629 gen_helper_sve_ldsdu_le_zsu_mte,
5630 gen_helper_sve_lddd_le_zsu_mte, } },
5631 { { gen_helper_sve_ldbds_zss_mte,
5632 gen_helper_sve_ldhds_le_zss_mte,
5633 gen_helper_sve_ldsds_le_zss_mte,
5634 NULL, },
5635 { gen_helper_sve_ldbdu_zss_mte,
5636 gen_helper_sve_ldhdu_le_zss_mte,
5637 gen_helper_sve_ldsdu_le_zss_mte,
5638 gen_helper_sve_lddd_le_zss_mte, } },
5639 { { gen_helper_sve_ldbds_zd_mte,
5640 gen_helper_sve_ldhds_le_zd_mte,
5641 gen_helper_sve_ldsds_le_zd_mte,
5642 NULL, },
5643 { gen_helper_sve_ldbdu_zd_mte,
5644 gen_helper_sve_ldhdu_le_zd_mte,
5645 gen_helper_sve_ldsdu_le_zd_mte,
5646 gen_helper_sve_lddd_le_zd_mte, } } },
5647
5648 /* First-fault */
5649 { { { gen_helper_sve_ldffbds_zsu_mte,
5650 gen_helper_sve_ldffhds_le_zsu_mte,
5651 gen_helper_sve_ldffsds_le_zsu_mte,
5652 NULL, },
5653 { gen_helper_sve_ldffbdu_zsu_mte,
5654 gen_helper_sve_ldffhdu_le_zsu_mte,
5655 gen_helper_sve_ldffsdu_le_zsu_mte,
5656 gen_helper_sve_ldffdd_le_zsu_mte, } },
5657 { { gen_helper_sve_ldffbds_zss_mte,
5658 gen_helper_sve_ldffhds_le_zss_mte,
5659 gen_helper_sve_ldffsds_le_zss_mte,
5660 NULL, },
5661 { gen_helper_sve_ldffbdu_zss_mte,
5662 gen_helper_sve_ldffhdu_le_zss_mte,
5663 gen_helper_sve_ldffsdu_le_zss_mte,
5664 gen_helper_sve_ldffdd_le_zss_mte, } },
5665 { { gen_helper_sve_ldffbds_zd_mte,
5666 gen_helper_sve_ldffhds_le_zd_mte,
5667 gen_helper_sve_ldffsds_le_zd_mte,
5668 NULL, },
5669 { gen_helper_sve_ldffbdu_zd_mte,
5670 gen_helper_sve_ldffhdu_le_zd_mte,
5671 gen_helper_sve_ldffsdu_le_zd_mte,
5672 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5673 { /* Big-endian */
5674 { { { gen_helper_sve_ldbds_zsu_mte,
5675 gen_helper_sve_ldhds_be_zsu_mte,
5676 gen_helper_sve_ldsds_be_zsu_mte,
5677 NULL, },
5678 { gen_helper_sve_ldbdu_zsu_mte,
5679 gen_helper_sve_ldhdu_be_zsu_mte,
5680 gen_helper_sve_ldsdu_be_zsu_mte,
5681 gen_helper_sve_lddd_be_zsu_mte, } },
5682 { { gen_helper_sve_ldbds_zss_mte,
5683 gen_helper_sve_ldhds_be_zss_mte,
5684 gen_helper_sve_ldsds_be_zss_mte,
5685 NULL, },
5686 { gen_helper_sve_ldbdu_zss_mte,
5687 gen_helper_sve_ldhdu_be_zss_mte,
5688 gen_helper_sve_ldsdu_be_zss_mte,
5689 gen_helper_sve_lddd_be_zss_mte, } },
5690 { { gen_helper_sve_ldbds_zd_mte,
5691 gen_helper_sve_ldhds_be_zd_mte,
5692 gen_helper_sve_ldsds_be_zd_mte,
5693 NULL, },
5694 { gen_helper_sve_ldbdu_zd_mte,
5695 gen_helper_sve_ldhdu_be_zd_mte,
5696 gen_helper_sve_ldsdu_be_zd_mte,
5697 gen_helper_sve_lddd_be_zd_mte, } } },
5698
5699 /* First-fault */
5700 { { { gen_helper_sve_ldffbds_zsu_mte,
5701 gen_helper_sve_ldffhds_be_zsu_mte,
5702 gen_helper_sve_ldffsds_be_zsu_mte,
5703 NULL, },
5704 { gen_helper_sve_ldffbdu_zsu_mte,
5705 gen_helper_sve_ldffhdu_be_zsu_mte,
5706 gen_helper_sve_ldffsdu_be_zsu_mte,
5707 gen_helper_sve_ldffdd_be_zsu_mte, } },
5708 { { gen_helper_sve_ldffbds_zss_mte,
5709 gen_helper_sve_ldffhds_be_zss_mte,
5710 gen_helper_sve_ldffsds_be_zss_mte,
5711 NULL, },
5712 { gen_helper_sve_ldffbdu_zss_mte,
5713 gen_helper_sve_ldffhdu_be_zss_mte,
5714 gen_helper_sve_ldffsdu_be_zss_mte,
5715 gen_helper_sve_ldffdd_be_zss_mte, } },
5716 { { gen_helper_sve_ldffbds_zd_mte,
5717 gen_helper_sve_ldffhds_be_zd_mte,
5718 gen_helper_sve_ldffsds_be_zd_mte,
5719 NULL, },
5720 { gen_helper_sve_ldffbdu_zd_mte,
5721 gen_helper_sve_ldffhdu_be_zd_mte,
5722 gen_helper_sve_ldffsdu_be_zd_mte,
5723 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5724};
5725
3a7be554 5726static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5727{
5728 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5729 bool be = s->be_data == MO_BE;
5730 bool mte = s->mte_active[0];
673e9fa6
RH
5731
5732 if (!sve_access_check(s)) {
5733 return true;
5734 }
5735
5736 switch (a->esz) {
5737 case MO_32:
d28d12f0 5738 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5739 break;
5740 case MO_64:
d28d12f0 5741 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5742 break;
5743 }
5744 assert(fn != NULL);
5745
5746 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5747 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5748 return true;
5749}
5750
3a7be554 5751static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5752{
5753 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5754 bool be = s->be_data == MO_BE;
5755 bool mte = s->mte_active[0];
673e9fa6
RH
5756
5757 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5758 return false;
5759 }
5760 if (!sve_access_check(s)) {
5761 return true;
5762 }
5763
5764 switch (a->esz) {
5765 case MO_32:
d28d12f0 5766 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5767 break;
5768 case MO_64:
d28d12f0 5769 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5770 break;
5771 }
5772 assert(fn != NULL);
5773
5774 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5775 * by loading the immediate into the scalar parameter.
5776 */
2ccdf94f
RH
5777 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5778 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
673e9fa6
RH
5779 return true;
5780}
5781
cf327449
SL
5782static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5783{
b17ab470
RH
5784 gen_helper_gvec_mem_scatter *fn = NULL;
5785 bool be = s->be_data == MO_BE;
5786 bool mte = s->mte_active[0];
5787
5788 if (a->esz < a->msz + !a->u) {
5789 return false;
5790 }
cf327449
SL
5791 if (!dc_isar_feature(aa64_sve2, s)) {
5792 return false;
5793 }
b17ab470
RH
5794 if (!sve_access_check(s)) {
5795 return true;
5796 }
5797
5798 switch (a->esz) {
5799 case MO_32:
5800 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5801 break;
5802 case MO_64:
5803 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5804 break;
5805 }
5806 assert(fn != NULL);
5807
5808 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5809 cpu_reg(s, a->rm), a->msz, false, fn);
5810 return true;
cf327449
SL
5811}
5812
d28d12f0
RH
5813/* Indexed by [mte][be][xs][msz]. */
5814static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5815 { /* MTE Inactive */
5816 { /* Little-endian */
5817 { gen_helper_sve_stbs_zsu,
5818 gen_helper_sve_sths_le_zsu,
5819 gen_helper_sve_stss_le_zsu, },
5820 { gen_helper_sve_stbs_zss,
5821 gen_helper_sve_sths_le_zss,
5822 gen_helper_sve_stss_le_zss, } },
5823 { /* Big-endian */
5824 { gen_helper_sve_stbs_zsu,
5825 gen_helper_sve_sths_be_zsu,
5826 gen_helper_sve_stss_be_zsu, },
5827 { gen_helper_sve_stbs_zss,
5828 gen_helper_sve_sths_be_zss,
5829 gen_helper_sve_stss_be_zss, } } },
5830 { /* MTE Active */
5831 { /* Little-endian */
5832 { gen_helper_sve_stbs_zsu_mte,
5833 gen_helper_sve_sths_le_zsu_mte,
5834 gen_helper_sve_stss_le_zsu_mte, },
5835 { gen_helper_sve_stbs_zss_mte,
5836 gen_helper_sve_sths_le_zss_mte,
5837 gen_helper_sve_stss_le_zss_mte, } },
5838 { /* Big-endian */
5839 { gen_helper_sve_stbs_zsu_mte,
5840 gen_helper_sve_sths_be_zsu_mte,
5841 gen_helper_sve_stss_be_zsu_mte, },
5842 { gen_helper_sve_stbs_zss_mte,
5843 gen_helper_sve_sths_be_zss_mte,
5844 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5845};
5846
5847/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5848static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5849 { /* MTE Inactive */
5850 { /* Little-endian */
5851 { gen_helper_sve_stbd_zsu,
5852 gen_helper_sve_sthd_le_zsu,
5853 gen_helper_sve_stsd_le_zsu,
5854 gen_helper_sve_stdd_le_zsu, },
5855 { gen_helper_sve_stbd_zss,
5856 gen_helper_sve_sthd_le_zss,
5857 gen_helper_sve_stsd_le_zss,
5858 gen_helper_sve_stdd_le_zss, },
5859 { gen_helper_sve_stbd_zd,
5860 gen_helper_sve_sthd_le_zd,
5861 gen_helper_sve_stsd_le_zd,
5862 gen_helper_sve_stdd_le_zd, } },
5863 { /* Big-endian */
5864 { gen_helper_sve_stbd_zsu,
5865 gen_helper_sve_sthd_be_zsu,
5866 gen_helper_sve_stsd_be_zsu,
5867 gen_helper_sve_stdd_be_zsu, },
5868 { gen_helper_sve_stbd_zss,
5869 gen_helper_sve_sthd_be_zss,
5870 gen_helper_sve_stsd_be_zss,
5871 gen_helper_sve_stdd_be_zss, },
5872 { gen_helper_sve_stbd_zd,
5873 gen_helper_sve_sthd_be_zd,
5874 gen_helper_sve_stsd_be_zd,
5875 gen_helper_sve_stdd_be_zd, } } },
5876 { /* MTE Inactive */
5877 { /* Little-endian */
5878 { gen_helper_sve_stbd_zsu_mte,
5879 gen_helper_sve_sthd_le_zsu_mte,
5880 gen_helper_sve_stsd_le_zsu_mte,
5881 gen_helper_sve_stdd_le_zsu_mte, },
5882 { gen_helper_sve_stbd_zss_mte,
5883 gen_helper_sve_sthd_le_zss_mte,
5884 gen_helper_sve_stsd_le_zss_mte,
5885 gen_helper_sve_stdd_le_zss_mte, },
5886 { gen_helper_sve_stbd_zd_mte,
5887 gen_helper_sve_sthd_le_zd_mte,
5888 gen_helper_sve_stsd_le_zd_mte,
5889 gen_helper_sve_stdd_le_zd_mte, } },
5890 { /* Big-endian */
5891 { gen_helper_sve_stbd_zsu_mte,
5892 gen_helper_sve_sthd_be_zsu_mte,
5893 gen_helper_sve_stsd_be_zsu_mte,
5894 gen_helper_sve_stdd_be_zsu_mte, },
5895 { gen_helper_sve_stbd_zss_mte,
5896 gen_helper_sve_sthd_be_zss_mte,
5897 gen_helper_sve_stsd_be_zss_mte,
5898 gen_helper_sve_stdd_be_zss_mte, },
5899 { gen_helper_sve_stbd_zd_mte,
5900 gen_helper_sve_sthd_be_zd_mte,
5901 gen_helper_sve_stsd_be_zd_mte,
5902 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5903};
5904
3a7be554 5905static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5906{
f6dbf62a 5907 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5908 bool be = s->be_data == MO_BE;
5909 bool mte = s->mte_active[0];
f6dbf62a
RH
5910
5911 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5912 return false;
5913 }
5914 if (!sve_access_check(s)) {
5915 return true;
5916 }
5917 switch (a->esz) {
5918 case MO_32:
d28d12f0 5919 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5920 break;
5921 case MO_64:
d28d12f0 5922 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5923 break;
5924 default:
5925 g_assert_not_reached();
5926 }
5927 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5928 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5929 return true;
5930}
dec6cf6b 5931
3a7be554 5932static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5933{
5934 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5935 bool be = s->be_data == MO_BE;
5936 bool mte = s->mte_active[0];
408ecde9
RH
5937
5938 if (a->esz < a->msz) {
5939 return false;
5940 }
5941 if (!sve_access_check(s)) {
5942 return true;
5943 }
5944
5945 switch (a->esz) {
5946 case MO_32:
d28d12f0 5947 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5948 break;
5949 case MO_64:
d28d12f0 5950 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5951 break;
5952 }
5953 assert(fn != NULL);
5954
5955 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5956 * by loading the immediate into the scalar parameter.
5957 */
2ccdf94f
RH
5958 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5959 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
408ecde9
RH
5960 return true;
5961}
5962
6ebca45f
SL
5963static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5964{
b17ab470
RH
5965 gen_helper_gvec_mem_scatter *fn;
5966 bool be = s->be_data == MO_BE;
5967 bool mte = s->mte_active[0];
5968
5969 if (a->esz < a->msz) {
5970 return false;
5971 }
6ebca45f
SL
5972 if (!dc_isar_feature(aa64_sve2, s)) {
5973 return false;
5974 }
b17ab470
RH
5975 if (!sve_access_check(s)) {
5976 return true;
5977 }
5978
5979 switch (a->esz) {
5980 case MO_32:
5981 fn = scatter_store_fn32[mte][be][0][a->msz];
5982 break;
5983 case MO_64:
5984 fn = scatter_store_fn64[mte][be][2][a->msz];
5985 break;
5986 default:
5987 g_assert_not_reached();
5988 }
5989
5990 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5991 cpu_reg(s, a->rm), a->msz, true, fn);
5992 return true;
6ebca45f
SL
5993}
5994
dec6cf6b
RH
5995/*
5996 * Prefetches
5997 */
5998
3a7be554 5999static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
6000{
6001 /* Prefetch is a nop within QEMU. */
2f95a3b0 6002 (void)sve_access_check(s);
dec6cf6b
RH
6003 return true;
6004}
6005
3a7be554 6006static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
6007{
6008 if (a->rm == 31) {
6009 return false;
6010 }
6011 /* Prefetch is a nop within QEMU. */
2f95a3b0 6012 (void)sve_access_check(s);
dec6cf6b
RH
6013 return true;
6014}
a2103582
RH
6015
6016/*
6017 * Move Prefix
6018 *
6019 * TODO: The implementation so far could handle predicated merging movprfx.
6020 * The helper functions as written take an extra source register to
6021 * use in the operation, but the result is only written when predication
6022 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6023 * to allow the final write back to the destination to be unconditional.
6024 * For predicated zeroing movprfx, we need to rearrange the helpers to
6025 * allow the final write back to zero inactives.
6026 *
6027 * In the meantime, just emit the moves.
6028 */
6029
4b0b37e9
RH
6030TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
6031TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
6032TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5dad1ba5
RH
6033
6034/*
6035 * SVE2 Integer Multiply - Unpredicated
6036 */
6037
b262215b 6038TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5dad1ba5 6039
bd394cf5
RH
6040static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
6041 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6042 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6043};
6044TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6045 smulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6046
bd394cf5
RH
6047static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
6048 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6049 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6050};
6051TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6052 umulh_zzz_fns[a->esz], a, 0)
5dad1ba5 6053
bd394cf5
RH
6054TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6055 gen_helper_gvec_pmul_b, a, 0)
5dad1ba5 6056
bd394cf5
RH
6057static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
6058 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6059 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6060};
6061TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6062 sqdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6063
bd394cf5
RH
6064static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
6065 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6066 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6067};
6068TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6069 sqrdmulh_zzz_fns[a->esz], a, 0)
169d7c58 6070
d4b1e59d
RH
6071/*
6072 * SVE2 Integer - Predicated
6073 */
6074
5880bdc0
RH
6075static gen_helper_gvec_4 * const sadlp_fns[4] = {
6076 NULL, gen_helper_sve2_sadalp_zpzz_h,
6077 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
6078};
6079TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6080 sadlp_fns[a->esz], a, 0)
d4b1e59d 6081
5880bdc0
RH
6082static gen_helper_gvec_4 * const uadlp_fns[4] = {
6083 NULL, gen_helper_sve2_uadalp_zpzz_h,
6084 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
6085};
6086TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
6087 uadlp_fns[a->esz], a, 0)
db366da8
RH
6088
6089/*
6090 * SVE2 integer unary operations (predicated)
6091 */
6092
b2c00961
RH
6093TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
6094 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
db366da8 6095
b2c00961
RH
6096TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
6097 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
db366da8 6098
b2c00961
RH
6099static gen_helper_gvec_3 * const sqabs_fns[4] = {
6100 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6101 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6102};
6103TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
db366da8 6104
b2c00961
RH
6105static gen_helper_gvec_3 * const sqneg_fns[4] = {
6106 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6107 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6108};
6109TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
45d9503d 6110
5880bdc0
RH
6111DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
6112DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
6113DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
45d9503d 6114
5880bdc0
RH
6115DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
6116DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
6117DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
a47dc220 6118
5880bdc0
RH
6119DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
6120DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
6121DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
a47dc220 6122
5880bdc0
RH
6123DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
6124DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
6125DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
8597dc8b 6126
5880bdc0
RH
6127DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
6128DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
6129DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
6130DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
6131DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
4f07fbeb 6132
5880bdc0
RH
6133DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
6134DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
6135DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
6136DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
6137DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
6138DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
0ce1dda8
RH
6139
6140/*
6141 * SVE2 Widening Integer Arithmetic
6142 */
6143
615f19fe
RH
6144static gen_helper_gvec_3 * const saddl_fns[4] = {
6145 NULL, gen_helper_sve2_saddl_h,
6146 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
6147};
6148TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6149 saddl_fns[a->esz], a, 0)
6150TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6151 saddl_fns[a->esz], a, 3)
6152TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6153 saddl_fns[a->esz], a, 2)
6154
6155static gen_helper_gvec_3 * const ssubl_fns[4] = {
6156 NULL, gen_helper_sve2_ssubl_h,
6157 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
6158};
6159TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6160 ssubl_fns[a->esz], a, 0)
6161TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6162 ssubl_fns[a->esz], a, 3)
6163TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6164 ssubl_fns[a->esz], a, 2)
6165TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6166 ssubl_fns[a->esz], a, 1)
6167
6168static gen_helper_gvec_3 * const sabdl_fns[4] = {
6169 NULL, gen_helper_sve2_sabdl_h,
6170 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6171};
6172TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6173 sabdl_fns[a->esz], a, 0)
6174TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6175 sabdl_fns[a->esz], a, 3)
6176
6177static gen_helper_gvec_3 * const uaddl_fns[4] = {
6178 NULL, gen_helper_sve2_uaddl_h,
6179 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6180};
6181TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6182 uaddl_fns[a->esz], a, 0)
6183TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6184 uaddl_fns[a->esz], a, 3)
6185
6186static gen_helper_gvec_3 * const usubl_fns[4] = {
6187 NULL, gen_helper_sve2_usubl_h,
6188 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6189};
6190TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6191 usubl_fns[a->esz], a, 0)
6192TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6193 usubl_fns[a->esz], a, 3)
6194
6195static gen_helper_gvec_3 * const uabdl_fns[4] = {
6196 NULL, gen_helper_sve2_uabdl_h,
6197 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6198};
6199TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6200 uabdl_fns[a->esz], a, 0)
6201TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6202 uabdl_fns[a->esz], a, 3)
6203
6204static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6205 NULL, gen_helper_sve2_sqdmull_zzz_h,
6206 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6207};
6208TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6209 sqdmull_fns[a->esz], a, 0)
6210TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6211 sqdmull_fns[a->esz], a, 3)
6212
6213static gen_helper_gvec_3 * const smull_fns[4] = {
6214 NULL, gen_helper_sve2_smull_zzz_h,
6215 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6216};
6217TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6218 smull_fns[a->esz], a, 0)
6219TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6220 smull_fns[a->esz], a, 3)
6221
6222static gen_helper_gvec_3 * const umull_fns[4] = {
6223 NULL, gen_helper_sve2_umull_zzz_h,
6224 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6225};
6226TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6227 umull_fns[a->esz], a, 0)
6228TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6229 umull_fns[a->esz], a, 3)
6230
6231static gen_helper_gvec_3 * const eoril_fns[4] = {
6232 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6233 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6234};
6235TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6236TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
2df3ca55 6237
e3a56131
RH
6238static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6239{
6240 static gen_helper_gvec_3 * const fns[4] = {
6241 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6242 NULL, gen_helper_sve2_pmull_d,
6243 };
6244 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6245 return false;
6246 }
615f19fe 6247 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
e3a56131
RH
6248}
6249
615f19fe
RH
6250TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6251TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
e3a56131 6252
615f19fe
RH
6253static gen_helper_gvec_3 * const saddw_fns[4] = {
6254 NULL, gen_helper_sve2_saddw_h,
6255 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6256};
6257TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6258TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
e3a56131 6259
615f19fe
RH
6260static gen_helper_gvec_3 * const ssubw_fns[4] = {
6261 NULL, gen_helper_sve2_ssubw_h,
6262 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6263};
6264TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6265TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
81fccf09 6266
615f19fe
RH
6267static gen_helper_gvec_3 * const uaddw_fns[4] = {
6268 NULL, gen_helper_sve2_uaddw_h,
6269 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6270};
6271TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6272TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
81fccf09 6273
615f19fe
RH
6274static gen_helper_gvec_3 * const usubw_fns[4] = {
6275 NULL, gen_helper_sve2_usubw_h,
6276 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6277};
6278TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6279TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
4269fef1
RH
6280
6281static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6282{
6283 int top = imm & 1;
6284 int shl = imm >> 1;
6285 int halfbits = 4 << vece;
6286
6287 if (top) {
6288 if (shl == halfbits) {
6289 TCGv_vec t = tcg_temp_new_vec_matching(d);
6290 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6291 tcg_gen_and_vec(vece, d, n, t);
6292 tcg_temp_free_vec(t);
6293 } else {
6294 tcg_gen_sari_vec(vece, d, n, halfbits);
6295 tcg_gen_shli_vec(vece, d, d, shl);
6296 }
6297 } else {
6298 tcg_gen_shli_vec(vece, d, n, halfbits);
6299 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6300 }
6301}
6302
6303static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6304{
6305 int halfbits = 4 << vece;
6306 int top = imm & 1;
6307 int shl = (imm >> 1);
6308 int shift;
6309 uint64_t mask;
6310
6311 mask = MAKE_64BIT_MASK(0, halfbits);
6312 mask <<= shl;
6313 mask = dup_const(vece, mask);
6314
6315 shift = shl - top * halfbits;
6316 if (shift < 0) {
6317 tcg_gen_shri_i64(d, n, -shift);
6318 } else {
6319 tcg_gen_shli_i64(d, n, shift);
6320 }
6321 tcg_gen_andi_i64(d, d, mask);
6322}
6323
6324static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6325{
6326 gen_ushll_i64(MO_16, d, n, imm);
6327}
6328
6329static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6330{
6331 gen_ushll_i64(MO_32, d, n, imm);
6332}
6333
6334static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6335{
6336 gen_ushll_i64(MO_64, d, n, imm);
6337}
6338
6339static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6340{
6341 int halfbits = 4 << vece;
6342 int top = imm & 1;
6343 int shl = imm >> 1;
6344
6345 if (top) {
6346 if (shl == halfbits) {
6347 TCGv_vec t = tcg_temp_new_vec_matching(d);
6348 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6349 tcg_gen_and_vec(vece, d, n, t);
6350 tcg_temp_free_vec(t);
6351 } else {
6352 tcg_gen_shri_vec(vece, d, n, halfbits);
6353 tcg_gen_shli_vec(vece, d, d, shl);
6354 }
6355 } else {
6356 if (shl == 0) {
6357 TCGv_vec t = tcg_temp_new_vec_matching(d);
6358 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6359 tcg_gen_and_vec(vece, d, n, t);
6360 tcg_temp_free_vec(t);
6361 } else {
6362 tcg_gen_shli_vec(vece, d, n, halfbits);
6363 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6364 }
6365 }
6366}
6367
6368static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6369 bool sel, bool uns)
6370{
6371 static const TCGOpcode sshll_list[] = {
6372 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6373 };
6374 static const TCGOpcode ushll_list[] = {
6375 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6376 };
6377 static const GVecGen2i ops[2][3] = {
6378 { { .fniv = gen_sshll_vec,
6379 .opt_opc = sshll_list,
6380 .fno = gen_helper_sve2_sshll_h,
6381 .vece = MO_16 },
6382 { .fniv = gen_sshll_vec,
6383 .opt_opc = sshll_list,
6384 .fno = gen_helper_sve2_sshll_s,
6385 .vece = MO_32 },
6386 { .fniv = gen_sshll_vec,
6387 .opt_opc = sshll_list,
6388 .fno = gen_helper_sve2_sshll_d,
6389 .vece = MO_64 } },
6390 { { .fni8 = gen_ushll16_i64,
6391 .fniv = gen_ushll_vec,
6392 .opt_opc = ushll_list,
6393 .fno = gen_helper_sve2_ushll_h,
6394 .vece = MO_16 },
6395 { .fni8 = gen_ushll32_i64,
6396 .fniv = gen_ushll_vec,
6397 .opt_opc = ushll_list,
6398 .fno = gen_helper_sve2_ushll_s,
6399 .vece = MO_32 },
6400 { .fni8 = gen_ushll64_i64,
6401 .fniv = gen_ushll_vec,
6402 .opt_opc = ushll_list,
6403 .fno = gen_helper_sve2_ushll_d,
6404 .vece = MO_64 } },
6405 };
6406
6407 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6408 return false;
6409 }
6410 if (sve_access_check(s)) {
6411 unsigned vsz = vec_full_reg_size(s);
6412 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6413 vec_full_reg_offset(s, a->rn),
6414 vsz, vsz, (a->imm << 1) | sel,
6415 &ops[uns][a->esz]);
6416 }
6417 return true;
6418}
6419
6420static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6421{
6422 return do_sve2_shll_tb(s, a, false, false);
6423}
6424
6425static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6426{
6427 return do_sve2_shll_tb(s, a, true, false);
6428}
6429
6430static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6431{
6432 return do_sve2_shll_tb(s, a, false, true);
6433}
6434
6435static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6436{
6437 return do_sve2_shll_tb(s, a, true, true);
6438}
cb9c33b8 6439
615f19fe
RH
6440static gen_helper_gvec_3 * const bext_fns[4] = {
6441 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6442 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6443};
6444TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6445 bext_fns[a->esz], a, 0)
ed4a6387 6446
615f19fe
RH
6447static gen_helper_gvec_3 * const bdep_fns[4] = {
6448 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6449 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6450};
6451TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6452 bdep_fns[a->esz], a, 0)
ed4a6387 6453
615f19fe
RH
6454static gen_helper_gvec_3 * const bgrp_fns[4] = {
6455 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6456 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6457};
6458TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6459 bgrp_fns[a->esz], a, 0)
ed4a6387 6460
615f19fe
RH
6461static gen_helper_gvec_3 * const cadd_fns[4] = {
6462 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6463 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6464};
6465TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6466 cadd_fns[a->esz], a, 0)
6467TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6468 cadd_fns[a->esz], a, 1)
6469
6470static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6471 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6472 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6473};
6474TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6475 sqcadd_fns[a->esz], a, 0)
6476TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6477 sqcadd_fns[a->esz], a, 1)
38650638 6478
eeb4e84d
RH
6479static gen_helper_gvec_4 * const sabal_fns[4] = {
6480 NULL, gen_helper_sve2_sabal_h,
6481 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6482};
6483TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6484TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
38650638 6485
eeb4e84d
RH
6486static gen_helper_gvec_4 * const uabal_fns[4] = {
6487 NULL, gen_helper_sve2_uabal_h,
6488 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6489};
6490TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6491TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
b8295dfb
RH
6492
6493static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6494{
6495 static gen_helper_gvec_4 * const fns[2] = {
6496 gen_helper_sve2_adcl_s,
6497 gen_helper_sve2_adcl_d,
6498 };
6499 /*
6500 * Note that in this case the ESZ field encodes both size and sign.
6501 * Split out 'subtract' into bit 1 of the data field for the helper.
6502 */
eeb4e84d 6503 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
b8295dfb
RH
6504}
6505
eeb4e84d
RH
6506TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6507TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
a7e3a90e 6508
f2be26a5
RH
6509TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6510TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6511TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6512TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6513TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6514TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
289a1797 6515
79828dcb
RH
6516TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6517TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
5ff2838d
RH
6518
6519static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6520 const GVecGen2 ops[3])
6521{
6522 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6523 !dc_isar_feature(aa64_sve2, s)) {
6524 return false;
6525 }
6526 if (sve_access_check(s)) {
6527 unsigned vsz = vec_full_reg_size(s);
6528 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6529 vec_full_reg_offset(s, a->rn),
6530 vsz, vsz, &ops[a->esz]);
6531 }
6532 return true;
6533}
6534
6535static const TCGOpcode sqxtn_list[] = {
6536 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6537};
6538
6539static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6540{
6541 TCGv_vec t = tcg_temp_new_vec_matching(d);
6542 int halfbits = 4 << vece;
6543 int64_t mask = (1ull << halfbits) - 1;
6544 int64_t min = -1ull << (halfbits - 1);
6545 int64_t max = -min - 1;
6546
6547 tcg_gen_dupi_vec(vece, t, min);
6548 tcg_gen_smax_vec(vece, d, n, t);
6549 tcg_gen_dupi_vec(vece, t, max);
6550 tcg_gen_smin_vec(vece, d, d, t);
6551 tcg_gen_dupi_vec(vece, t, mask);
6552 tcg_gen_and_vec(vece, d, d, t);
6553 tcg_temp_free_vec(t);
6554}
6555
6556static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6557{
6558 static const GVecGen2 ops[3] = {
6559 { .fniv = gen_sqxtnb_vec,
6560 .opt_opc = sqxtn_list,
6561 .fno = gen_helper_sve2_sqxtnb_h,
6562 .vece = MO_16 },
6563 { .fniv = gen_sqxtnb_vec,
6564 .opt_opc = sqxtn_list,
6565 .fno = gen_helper_sve2_sqxtnb_s,
6566 .vece = MO_32 },
6567 { .fniv = gen_sqxtnb_vec,
6568 .opt_opc = sqxtn_list,
6569 .fno = gen_helper_sve2_sqxtnb_d,
6570 .vece = MO_64 },
6571 };
6572 return do_sve2_narrow_extract(s, a, ops);
6573}
6574
6575static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6576{
6577 TCGv_vec t = tcg_temp_new_vec_matching(d);
6578 int halfbits = 4 << vece;
6579 int64_t mask = (1ull << halfbits) - 1;
6580 int64_t min = -1ull << (halfbits - 1);
6581 int64_t max = -min - 1;
6582
6583 tcg_gen_dupi_vec(vece, t, min);
6584 tcg_gen_smax_vec(vece, n, n, t);
6585 tcg_gen_dupi_vec(vece, t, max);
6586 tcg_gen_smin_vec(vece, n, n, t);
6587 tcg_gen_shli_vec(vece, n, n, halfbits);
6588 tcg_gen_dupi_vec(vece, t, mask);
6589 tcg_gen_bitsel_vec(vece, d, t, d, n);
6590 tcg_temp_free_vec(t);
6591}
6592
6593static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6594{
6595 static const GVecGen2 ops[3] = {
6596 { .fniv = gen_sqxtnt_vec,
6597 .opt_opc = sqxtn_list,
6598 .load_dest = true,
6599 .fno = gen_helper_sve2_sqxtnt_h,
6600 .vece = MO_16 },
6601 { .fniv = gen_sqxtnt_vec,
6602 .opt_opc = sqxtn_list,
6603 .load_dest = true,
6604 .fno = gen_helper_sve2_sqxtnt_s,
6605 .vece = MO_32 },
6606 { .fniv = gen_sqxtnt_vec,
6607 .opt_opc = sqxtn_list,
6608 .load_dest = true,
6609 .fno = gen_helper_sve2_sqxtnt_d,
6610 .vece = MO_64 },
6611 };
6612 return do_sve2_narrow_extract(s, a, ops);
6613}
6614
6615static const TCGOpcode uqxtn_list[] = {
6616 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6617};
6618
6619static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6620{
6621 TCGv_vec t = tcg_temp_new_vec_matching(d);
6622 int halfbits = 4 << vece;
6623 int64_t max = (1ull << halfbits) - 1;
6624
6625 tcg_gen_dupi_vec(vece, t, max);
6626 tcg_gen_umin_vec(vece, d, n, t);
6627 tcg_temp_free_vec(t);
6628}
6629
6630static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6631{
6632 static const GVecGen2 ops[3] = {
6633 { .fniv = gen_uqxtnb_vec,
6634 .opt_opc = uqxtn_list,
6635 .fno = gen_helper_sve2_uqxtnb_h,
6636 .vece = MO_16 },
6637 { .fniv = gen_uqxtnb_vec,
6638 .opt_opc = uqxtn_list,
6639 .fno = gen_helper_sve2_uqxtnb_s,
6640 .vece = MO_32 },
6641 { .fniv = gen_uqxtnb_vec,
6642 .opt_opc = uqxtn_list,
6643 .fno = gen_helper_sve2_uqxtnb_d,
6644 .vece = MO_64 },
6645 };
6646 return do_sve2_narrow_extract(s, a, ops);
6647}
6648
6649static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6650{
6651 TCGv_vec t = tcg_temp_new_vec_matching(d);
6652 int halfbits = 4 << vece;
6653 int64_t max = (1ull << halfbits) - 1;
6654
6655 tcg_gen_dupi_vec(vece, t, max);
6656 tcg_gen_umin_vec(vece, n, n, t);
6657 tcg_gen_shli_vec(vece, n, n, halfbits);
6658 tcg_gen_bitsel_vec(vece, d, t, d, n);
6659 tcg_temp_free_vec(t);
6660}
6661
6662static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
6663{
6664 static const GVecGen2 ops[3] = {
6665 { .fniv = gen_uqxtnt_vec,
6666 .opt_opc = uqxtn_list,
6667 .load_dest = true,
6668 .fno = gen_helper_sve2_uqxtnt_h,
6669 .vece = MO_16 },
6670 { .fniv = gen_uqxtnt_vec,
6671 .opt_opc = uqxtn_list,
6672 .load_dest = true,
6673 .fno = gen_helper_sve2_uqxtnt_s,
6674 .vece = MO_32 },
6675 { .fniv = gen_uqxtnt_vec,
6676 .opt_opc = uqxtn_list,
6677 .load_dest = true,
6678 .fno = gen_helper_sve2_uqxtnt_d,
6679 .vece = MO_64 },
6680 };
6681 return do_sve2_narrow_extract(s, a, ops);
6682}
6683
6684static const TCGOpcode sqxtun_list[] = {
6685 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6686};
6687
6688static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6689{
6690 TCGv_vec t = tcg_temp_new_vec_matching(d);
6691 int halfbits = 4 << vece;
6692 int64_t max = (1ull << halfbits) - 1;
6693
6694 tcg_gen_dupi_vec(vece, t, 0);
6695 tcg_gen_smax_vec(vece, d, n, t);
6696 tcg_gen_dupi_vec(vece, t, max);
6697 tcg_gen_umin_vec(vece, d, d, t);
6698 tcg_temp_free_vec(t);
6699}
6700
6701static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
6702{
6703 static const GVecGen2 ops[3] = {
6704 { .fniv = gen_sqxtunb_vec,
6705 .opt_opc = sqxtun_list,
6706 .fno = gen_helper_sve2_sqxtunb_h,
6707 .vece = MO_16 },
6708 { .fniv = gen_sqxtunb_vec,
6709 .opt_opc = sqxtun_list,
6710 .fno = gen_helper_sve2_sqxtunb_s,
6711 .vece = MO_32 },
6712 { .fniv = gen_sqxtunb_vec,
6713 .opt_opc = sqxtun_list,
6714 .fno = gen_helper_sve2_sqxtunb_d,
6715 .vece = MO_64 },
6716 };
6717 return do_sve2_narrow_extract(s, a, ops);
6718}
6719
6720static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6721{
6722 TCGv_vec t = tcg_temp_new_vec_matching(d);
6723 int halfbits = 4 << vece;
6724 int64_t max = (1ull << halfbits) - 1;
6725
6726 tcg_gen_dupi_vec(vece, t, 0);
6727 tcg_gen_smax_vec(vece, n, n, t);
6728 tcg_gen_dupi_vec(vece, t, max);
6729 tcg_gen_umin_vec(vece, n, n, t);
6730 tcg_gen_shli_vec(vece, n, n, halfbits);
6731 tcg_gen_bitsel_vec(vece, d, t, d, n);
6732 tcg_temp_free_vec(t);
6733}
6734
6735static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
6736{
6737 static const GVecGen2 ops[3] = {
6738 { .fniv = gen_sqxtunt_vec,
6739 .opt_opc = sqxtun_list,
6740 .load_dest = true,
6741 .fno = gen_helper_sve2_sqxtunt_h,
6742 .vece = MO_16 },
6743 { .fniv = gen_sqxtunt_vec,
6744 .opt_opc = sqxtun_list,
6745 .load_dest = true,
6746 .fno = gen_helper_sve2_sqxtunt_s,
6747 .vece = MO_32 },
6748 { .fniv = gen_sqxtunt_vec,
6749 .opt_opc = sqxtun_list,
6750 .load_dest = true,
6751 .fno = gen_helper_sve2_sqxtunt_d,
6752 .vece = MO_64 },
6753 };
6754 return do_sve2_narrow_extract(s, a, ops);
46d111b2
RH
6755}
6756
6757static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
6758 const GVecGen2i ops[3])
6759{
6760 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
6761 return false;
6762 }
6763 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6764 if (sve_access_check(s)) {
6765 unsigned vsz = vec_full_reg_size(s);
6766 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6767 vec_full_reg_offset(s, a->rn),
6768 vsz, vsz, a->imm, &ops[a->esz]);
6769 }
6770 return true;
6771}
6772
6773static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6774{
6775 int halfbits = 4 << vece;
6776 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6777
6778 tcg_gen_shri_i64(d, n, shr);
6779 tcg_gen_andi_i64(d, d, mask);
6780}
6781
6782static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6783{
6784 gen_shrnb_i64(MO_16, d, n, shr);
6785}
6786
6787static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6788{
6789 gen_shrnb_i64(MO_32, d, n, shr);
6790}
6791
6792static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6793{
6794 gen_shrnb_i64(MO_64, d, n, shr);
6795}
6796
6797static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6798{
6799 TCGv_vec t = tcg_temp_new_vec_matching(d);
6800 int halfbits = 4 << vece;
6801 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6802
6803 tcg_gen_shri_vec(vece, n, n, shr);
6804 tcg_gen_dupi_vec(vece, t, mask);
6805 tcg_gen_and_vec(vece, d, n, t);
6806 tcg_temp_free_vec(t);
6807}
6808
6809static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
6810{
6811 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
6812 static const GVecGen2i ops[3] = {
6813 { .fni8 = gen_shrnb16_i64,
6814 .fniv = gen_shrnb_vec,
6815 .opt_opc = vec_list,
6816 .fno = gen_helper_sve2_shrnb_h,
6817 .vece = MO_16 },
6818 { .fni8 = gen_shrnb32_i64,
6819 .fniv = gen_shrnb_vec,
6820 .opt_opc = vec_list,
6821 .fno = gen_helper_sve2_shrnb_s,
6822 .vece = MO_32 },
6823 { .fni8 = gen_shrnb64_i64,
6824 .fniv = gen_shrnb_vec,
6825 .opt_opc = vec_list,
6826 .fno = gen_helper_sve2_shrnb_d,
6827 .vece = MO_64 },
6828 };
6829 return do_sve2_shr_narrow(s, a, ops);
6830}
6831
6832static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6833{
6834 int halfbits = 4 << vece;
6835 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6836
6837 tcg_gen_shli_i64(n, n, halfbits - shr);
6838 tcg_gen_andi_i64(n, n, ~mask);
6839 tcg_gen_andi_i64(d, d, mask);
6840 tcg_gen_or_i64(d, d, n);
6841}
6842
6843static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6844{
6845 gen_shrnt_i64(MO_16, d, n, shr);
6846}
6847
6848static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6849{
6850 gen_shrnt_i64(MO_32, d, n, shr);
6851}
6852
6853static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6854{
6855 tcg_gen_shri_i64(n, n, shr);
6856 tcg_gen_deposit_i64(d, d, n, 32, 32);
6857}
6858
6859static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6860{
6861 TCGv_vec t = tcg_temp_new_vec_matching(d);
6862 int halfbits = 4 << vece;
6863 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6864
6865 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6866 tcg_gen_dupi_vec(vece, t, mask);
6867 tcg_gen_bitsel_vec(vece, d, t, d, n);
6868 tcg_temp_free_vec(t);
6869}
6870
6871static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
6872{
6873 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
6874 static const GVecGen2i ops[3] = {
6875 { .fni8 = gen_shrnt16_i64,
6876 .fniv = gen_shrnt_vec,
6877 .opt_opc = vec_list,
6878 .load_dest = true,
6879 .fno = gen_helper_sve2_shrnt_h,
6880 .vece = MO_16 },
6881 { .fni8 = gen_shrnt32_i64,
6882 .fniv = gen_shrnt_vec,
6883 .opt_opc = vec_list,
6884 .load_dest = true,
6885 .fno = gen_helper_sve2_shrnt_s,
6886 .vece = MO_32 },
6887 { .fni8 = gen_shrnt64_i64,
6888 .fniv = gen_shrnt_vec,
6889 .opt_opc = vec_list,
6890 .load_dest = true,
6891 .fno = gen_helper_sve2_shrnt_d,
6892 .vece = MO_64 },
6893 };
6894 return do_sve2_shr_narrow(s, a, ops);
6895}
6896
6897static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
6898{
6899 static const GVecGen2i ops[3] = {
6900 { .fno = gen_helper_sve2_rshrnb_h },
6901 { .fno = gen_helper_sve2_rshrnb_s },
6902 { .fno = gen_helper_sve2_rshrnb_d },
6903 };
6904 return do_sve2_shr_narrow(s, a, ops);
6905}
6906
6907static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
6908{
6909 static const GVecGen2i ops[3] = {
6910 { .fno = gen_helper_sve2_rshrnt_h },
6911 { .fno = gen_helper_sve2_rshrnt_s },
6912 { .fno = gen_helper_sve2_rshrnt_d },
6913 };
6914 return do_sve2_shr_narrow(s, a, ops);
81fd3e6e
RH
6915}
6916
6917static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6918 TCGv_vec n, int64_t shr)
6919{
6920 TCGv_vec t = tcg_temp_new_vec_matching(d);
6921 int halfbits = 4 << vece;
6922
6923 tcg_gen_sari_vec(vece, n, n, shr);
6924 tcg_gen_dupi_vec(vece, t, 0);
6925 tcg_gen_smax_vec(vece, n, n, t);
6926 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6927 tcg_gen_umin_vec(vece, d, n, t);
6928 tcg_temp_free_vec(t);
6929}
6930
6931static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
6932{
6933 static const TCGOpcode vec_list[] = {
6934 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6935 };
6936 static const GVecGen2i ops[3] = {
6937 { .fniv = gen_sqshrunb_vec,
6938 .opt_opc = vec_list,
6939 .fno = gen_helper_sve2_sqshrunb_h,
6940 .vece = MO_16 },
6941 { .fniv = gen_sqshrunb_vec,
6942 .opt_opc = vec_list,
6943 .fno = gen_helper_sve2_sqshrunb_s,
6944 .vece = MO_32 },
6945 { .fniv = gen_sqshrunb_vec,
6946 .opt_opc = vec_list,
6947 .fno = gen_helper_sve2_sqshrunb_d,
6948 .vece = MO_64 },
6949 };
6950 return do_sve2_shr_narrow(s, a, ops);
6951}
6952
6953static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6954 TCGv_vec n, int64_t shr)
6955{
6956 TCGv_vec t = tcg_temp_new_vec_matching(d);
6957 int halfbits = 4 << vece;
6958
6959 tcg_gen_sari_vec(vece, n, n, shr);
6960 tcg_gen_dupi_vec(vece, t, 0);
6961 tcg_gen_smax_vec(vece, n, n, t);
6962 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6963 tcg_gen_umin_vec(vece, n, n, t);
6964 tcg_gen_shli_vec(vece, n, n, halfbits);
6965 tcg_gen_bitsel_vec(vece, d, t, d, n);
6966 tcg_temp_free_vec(t);
6967}
6968
6969static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
6970{
6971 static const TCGOpcode vec_list[] = {
6972 INDEX_op_shli_vec, INDEX_op_sari_vec,
6973 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6974 };
6975 static const GVecGen2i ops[3] = {
6976 { .fniv = gen_sqshrunt_vec,
6977 .opt_opc = vec_list,
6978 .load_dest = true,
6979 .fno = gen_helper_sve2_sqshrunt_h,
6980 .vece = MO_16 },
6981 { .fniv = gen_sqshrunt_vec,
6982 .opt_opc = vec_list,
6983 .load_dest = true,
6984 .fno = gen_helper_sve2_sqshrunt_s,
6985 .vece = MO_32 },
6986 { .fniv = gen_sqshrunt_vec,
6987 .opt_opc = vec_list,
6988 .load_dest = true,
6989 .fno = gen_helper_sve2_sqshrunt_d,
6990 .vece = MO_64 },
6991 };
6992 return do_sve2_shr_narrow(s, a, ops);
6993}
6994
6995static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
6996{
6997 static const GVecGen2i ops[3] = {
6998 { .fno = gen_helper_sve2_sqrshrunb_h },
6999 { .fno = gen_helper_sve2_sqrshrunb_s },
7000 { .fno = gen_helper_sve2_sqrshrunb_d },
7001 };
7002 return do_sve2_shr_narrow(s, a, ops);
7003}
7004
7005static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7006{
7007 static const GVecGen2i ops[3] = {
7008 { .fno = gen_helper_sve2_sqrshrunt_h },
7009 { .fno = gen_helper_sve2_sqrshrunt_s },
7010 { .fno = gen_helper_sve2_sqrshrunt_d },
7011 };
7012 return do_sve2_shr_narrow(s, a, ops);
c13418da
RH
7013}
7014
743bb147
RH
7015static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7016 TCGv_vec n, int64_t shr)
7017{
7018 TCGv_vec t = tcg_temp_new_vec_matching(d);
7019 int halfbits = 4 << vece;
7020 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7021 int64_t min = -max - 1;
7022
7023 tcg_gen_sari_vec(vece, n, n, shr);
7024 tcg_gen_dupi_vec(vece, t, min);
7025 tcg_gen_smax_vec(vece, n, n, t);
7026 tcg_gen_dupi_vec(vece, t, max);
7027 tcg_gen_smin_vec(vece, n, n, t);
7028 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7029 tcg_gen_and_vec(vece, d, n, t);
7030 tcg_temp_free_vec(t);
7031}
7032
7033static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7034{
7035 static const TCGOpcode vec_list[] = {
7036 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7037 };
7038 static const GVecGen2i ops[3] = {
7039 { .fniv = gen_sqshrnb_vec,
7040 .opt_opc = vec_list,
7041 .fno = gen_helper_sve2_sqshrnb_h,
7042 .vece = MO_16 },
7043 { .fniv = gen_sqshrnb_vec,
7044 .opt_opc = vec_list,
7045 .fno = gen_helper_sve2_sqshrnb_s,
7046 .vece = MO_32 },
7047 { .fniv = gen_sqshrnb_vec,
7048 .opt_opc = vec_list,
7049 .fno = gen_helper_sve2_sqshrnb_d,
7050 .vece = MO_64 },
7051 };
7052 return do_sve2_shr_narrow(s, a, ops);
7053}
7054
7055static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7056 TCGv_vec n, int64_t shr)
7057{
7058 TCGv_vec t = tcg_temp_new_vec_matching(d);
7059 int halfbits = 4 << vece;
7060 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7061 int64_t min = -max - 1;
7062
7063 tcg_gen_sari_vec(vece, n, n, shr);
7064 tcg_gen_dupi_vec(vece, t, min);
7065 tcg_gen_smax_vec(vece, n, n, t);
7066 tcg_gen_dupi_vec(vece, t, max);
7067 tcg_gen_smin_vec(vece, n, n, t);
7068 tcg_gen_shli_vec(vece, n, n, halfbits);
7069 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7070 tcg_gen_bitsel_vec(vece, d, t, d, n);
7071 tcg_temp_free_vec(t);
7072}
7073
7074static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7075{
7076 static const TCGOpcode vec_list[] = {
7077 INDEX_op_shli_vec, INDEX_op_sari_vec,
7078 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7079 };
7080 static const GVecGen2i ops[3] = {
7081 { .fniv = gen_sqshrnt_vec,
7082 .opt_opc = vec_list,
7083 .load_dest = true,
7084 .fno = gen_helper_sve2_sqshrnt_h,
7085 .vece = MO_16 },
7086 { .fniv = gen_sqshrnt_vec,
7087 .opt_opc = vec_list,
7088 .load_dest = true,
7089 .fno = gen_helper_sve2_sqshrnt_s,
7090 .vece = MO_32 },
7091 { .fniv = gen_sqshrnt_vec,
7092 .opt_opc = vec_list,
7093 .load_dest = true,
7094 .fno = gen_helper_sve2_sqshrnt_d,
7095 .vece = MO_64 },
7096 };
7097 return do_sve2_shr_narrow(s, a, ops);
7098}
7099
7100static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7101{
7102 static const GVecGen2i ops[3] = {
7103 { .fno = gen_helper_sve2_sqrshrnb_h },
7104 { .fno = gen_helper_sve2_sqrshrnb_s },
7105 { .fno = gen_helper_sve2_sqrshrnb_d },
7106 };
7107 return do_sve2_shr_narrow(s, a, ops);
7108}
7109
7110static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7111{
7112 static const GVecGen2i ops[3] = {
7113 { .fno = gen_helper_sve2_sqrshrnt_h },
7114 { .fno = gen_helper_sve2_sqrshrnt_s },
7115 { .fno = gen_helper_sve2_sqrshrnt_d },
7116 };
7117 return do_sve2_shr_narrow(s, a, ops);
7118}
7119
c13418da
RH
7120static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7121 TCGv_vec n, int64_t shr)
7122{
7123 TCGv_vec t = tcg_temp_new_vec_matching(d);
7124 int halfbits = 4 << vece;
7125
7126 tcg_gen_shri_vec(vece, n, n, shr);
7127 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7128 tcg_gen_umin_vec(vece, d, n, t);
7129 tcg_temp_free_vec(t);
7130}
7131
7132static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7133{
7134 static const TCGOpcode vec_list[] = {
7135 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7136 };
7137 static const GVecGen2i ops[3] = {
7138 { .fniv = gen_uqshrnb_vec,
7139 .opt_opc = vec_list,
7140 .fno = gen_helper_sve2_uqshrnb_h,
7141 .vece = MO_16 },
7142 { .fniv = gen_uqshrnb_vec,
7143 .opt_opc = vec_list,
7144 .fno = gen_helper_sve2_uqshrnb_s,
7145 .vece = MO_32 },
7146 { .fniv = gen_uqshrnb_vec,
7147 .opt_opc = vec_list,
7148 .fno = gen_helper_sve2_uqshrnb_d,
7149 .vece = MO_64 },
7150 };
7151 return do_sve2_shr_narrow(s, a, ops);
7152}
7153
7154static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
7155 TCGv_vec n, int64_t shr)
7156{
7157 TCGv_vec t = tcg_temp_new_vec_matching(d);
7158 int halfbits = 4 << vece;
7159
7160 tcg_gen_shri_vec(vece, n, n, shr);
7161 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7162 tcg_gen_umin_vec(vece, n, n, t);
7163 tcg_gen_shli_vec(vece, n, n, halfbits);
7164 tcg_gen_bitsel_vec(vece, d, t, d, n);
7165 tcg_temp_free_vec(t);
7166}
7167
7168static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7169{
7170 static const TCGOpcode vec_list[] = {
7171 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7172 };
7173 static const GVecGen2i ops[3] = {
7174 { .fniv = gen_uqshrnt_vec,
7175 .opt_opc = vec_list,
7176 .load_dest = true,
7177 .fno = gen_helper_sve2_uqshrnt_h,
7178 .vece = MO_16 },
7179 { .fniv = gen_uqshrnt_vec,
7180 .opt_opc = vec_list,
7181 .load_dest = true,
7182 .fno = gen_helper_sve2_uqshrnt_s,
7183 .vece = MO_32 },
7184 { .fniv = gen_uqshrnt_vec,
7185 .opt_opc = vec_list,
7186 .load_dest = true,
7187 .fno = gen_helper_sve2_uqshrnt_d,
7188 .vece = MO_64 },
7189 };
7190 return do_sve2_shr_narrow(s, a, ops);
7191}
7192
7193static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7194{
7195 static const GVecGen2i ops[3] = {
7196 { .fno = gen_helper_sve2_uqrshrnb_h },
7197 { .fno = gen_helper_sve2_uqrshrnb_s },
7198 { .fno = gen_helper_sve2_uqrshrnb_d },
7199 };
7200 return do_sve2_shr_narrow(s, a, ops);
7201}
7202
7203static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7204{
7205 static const GVecGen2i ops[3] = {
7206 { .fno = gen_helper_sve2_uqrshrnt_h },
7207 { .fno = gen_helper_sve2_uqrshrnt_s },
7208 { .fno = gen_helper_sve2_uqrshrnt_d },
7209 };
7210 return do_sve2_shr_narrow(s, a, ops);
5ff2838d 7211}
b87dbeeb 7212
40d5ea50 7213#define DO_SVE2_ZZZ_NARROW(NAME, name) \
bd394cf5 7214 static gen_helper_gvec_3 * const name##_fns[4] = { \
40d5ea50
SL
7215 NULL, gen_helper_sve2_##name##_h, \
7216 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7217 }; \
bd394cf5
RH
7218 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7219 name##_fns[a->esz], a, 0)
40d5ea50
SL
7220
7221DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7222DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
0ea3ff02
SL
7223DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7224DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
40d5ea50 7225
c3cd6766
SL
7226DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7227DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
e9443d10
SL
7228DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7229DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
c3cd6766 7230
ef75309b
RH
7231static gen_helper_gvec_flags_4 * const match_fns[4] = {
7232 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7233};
7234TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
e0ae6ec3 7235
ef75309b
RH
7236static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7237 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7238};
7239TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
e0ae6ec3 7240
5880bdc0
RH
7241static gen_helper_gvec_4 * const histcnt_fns[4] = {
7242 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7243};
7244TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7245 histcnt_fns[a->esz], a, 0)
7d47ac94 7246
bd394cf5
RH
7247TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7248 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7d47ac94 7249
b87dbeeb
SL
7250static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
7251 gen_helper_gvec_4_ptr *fn)
7252{
7253 if (!dc_isar_feature(aa64_sve2, s)) {
7254 return false;
7255 }
7256 return do_zpzz_fp(s, a, fn);
7257}
7258
7259#define DO_SVE2_ZPZZ_FP(NAME, name) \
7260static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
7261{ \
7262 static gen_helper_gvec_4_ptr * const fns[4] = { \
7263 NULL, gen_helper_sve2_##name##_zpzz_h, \
7264 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
7265 }; \
7266 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
7267}
7268
7269DO_SVE2_ZPZZ_FP(FADDP, faddp)
7270DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
7271DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
7272DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
7273DO_SVE2_ZPZZ_FP(FMINP, fminp)
bfc9307e
RH
7274
7275/*
7276 * SVE Integer Multiply-Add (unpredicated)
7277 */
7278
25aee7cc
RH
7279TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7280 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7281TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7282 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
4f26756b 7283
eeb4e84d
RH
7284static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7285 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7286 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7287};
7288TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7289 sqdmlal_zzzw_fns[a->esz], a, 0)
7290TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7291 sqdmlal_zzzw_fns[a->esz], a, 3)
7292TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7293 sqdmlal_zzzw_fns[a->esz], a, 2)
7294
7295static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7296 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7297 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7298};
7299TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7300 sqdmlsl_zzzw_fns[a->esz], a, 0)
7301TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7302 sqdmlsl_zzzw_fns[a->esz], a, 3)
7303TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7304 sqdmlsl_zzzw_fns[a->esz], a, 2)
7305
7306static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7307 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7308 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7309};
7310TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7311 sqrdmlah_fns[a->esz], a, 0)
45a32e80 7312
eeb4e84d
RH
7313static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7314 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7315 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7316};
7317TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7318 sqrdmlsh_fns[a->esz], a, 0)
45a32e80 7319
eeb4e84d
RH
7320static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7321 NULL, gen_helper_sve2_smlal_zzzw_h,
7322 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7323};
7324TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7325 smlal_zzzw_fns[a->esz], a, 0)
7326TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7327 smlal_zzzw_fns[a->esz], a, 1)
7328
7329static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7330 NULL, gen_helper_sve2_umlal_zzzw_h,
7331 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7332};
7333TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7334 umlal_zzzw_fns[a->esz], a, 0)
7335TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7336 umlal_zzzw_fns[a->esz], a, 1)
7337
7338static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7339 NULL, gen_helper_sve2_smlsl_zzzw_h,
7340 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7341};
7342TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7343 smlsl_zzzw_fns[a->esz], a, 0)
7344TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7345 smlsl_zzzw_fns[a->esz], a, 1)
7346
7347static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7348 NULL, gen_helper_sve2_umlsl_zzzw_h,
7349 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7350};
7351TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7352 umlsl_zzzw_fns[a->esz], a, 0)
7353TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7354 umlsl_zzzw_fns[a->esz], a, 1)
d782d3ca 7355
5f425b92
RH
7356static gen_helper_gvec_4 * const cmla_fns[] = {
7357 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7358 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7359};
7360TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7361 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
21068f39 7362
5f425b92
RH
7363static gen_helper_gvec_4 * const cdot_fns[] = {
7364 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7365};
7366TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7367 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
d782d3ca 7368
5f425b92
RH
7369static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7370 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7371 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7372};
7373TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7374 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
6a98cb2a 7375
8740d694
RH
7376TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7377 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
b2bcd1be 7378
0ea3cdbf
RH
7379TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7380 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
3cc7a88e 7381
32e2ad65
RH
7382TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7383 gen_helper_crypto_aese, a, false)
7384TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7385 gen_helper_crypto_aese, a, true)
3cc7a88e 7386
32e2ad65
RH
7387TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7388 gen_helper_crypto_sm4e, a, 0)
7389TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7390 gen_helper_crypto_sm4ekey, a, 0)
3358eb3f 7391
2aa469ff 7392TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
5c1b7226
RH
7393
7394static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
7395{
7396 if (!dc_isar_feature(aa64_sve2, s)) {
7397 return false;
7398 }
7399 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
7400}
7401
d29b17ca
RH
7402static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
7403{
7404 if (!dc_isar_feature(aa64_sve_bf16, s)) {
7405 return false;
7406 }
7407 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
7408}
7409
5c1b7226
RH
7410static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
7411{
7412 if (!dc_isar_feature(aa64_sve2, s)) {
7413 return false;
7414 }
7415 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
7416}
83c2523f
SL
7417
7418static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
7419{
7420 if (!dc_isar_feature(aa64_sve2, s)) {
7421 return false;
7422 }
7423 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
7424}
7425
7426static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
7427{
7428 if (!dc_isar_feature(aa64_sve2, s)) {
7429 return false;
7430 }
7431 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
7432}
95365277
SL
7433
7434static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
7435{
7436 if (!dc_isar_feature(aa64_sve2, s)) {
7437 return false;
7438 }
7439 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
7440}
7441
7442static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
7443{
7444 if (!dc_isar_feature(aa64_sve2, s)) {
7445 return false;
7446 }
7447 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
7448}
631be02e
SL
7449
7450static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
7451{
7452 static gen_helper_gvec_3_ptr * const fns[] = {
7453 NULL, gen_helper_flogb_h,
7454 gen_helper_flogb_s, gen_helper_flogb_d
7455 };
7456
7457 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
7458 return false;
7459 }
7460 if (sve_access_check(s)) {
7461 TCGv_ptr status =
7462 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7463 unsigned vsz = vec_full_reg_size(s);
7464
7465 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
7466 vec_full_reg_offset(s, a->rn),
7467 pred_full_reg_offset(s, a->pg),
7468 status, vsz, vsz, 0, fns[a->esz]);
7469 tcg_temp_free_ptr(status);
7470 }
7471 return true;
7472}
50d102bd
SL
7473
7474static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7475{
7476 if (!dc_isar_feature(aa64_sve2, s)) {
7477 return false;
7478 }
41bf9b67
RH
7479 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7480 a->rd, a->rn, a->rm, a->ra,
7481 (sel << 1) | sub, cpu_env);
50d102bd
SL
7482}
7483
7484static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7485{
7486 return do_FMLAL_zzzw(s, a, false, false);
7487}
7488
7489static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7490{
7491 return do_FMLAL_zzzw(s, a, false, true);
7492}
7493
7494static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7495{
7496 return do_FMLAL_zzzw(s, a, true, false);
7497}
7498
7499static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7500{
7501 return do_FMLAL_zzzw(s, a, true, true);
7502}
7503
7504static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7505{
7506 if (!dc_isar_feature(aa64_sve2, s)) {
7507 return false;
7508 }
41bf9b67
RH
7509 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7510 a->rd, a->rn, a->rm, a->ra,
7511 (a->index << 2) | (sel << 1) | sub, cpu_env);
50d102bd
SL
7512}
7513
7514static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7515{
7516 return do_FMLAL_zzxw(s, a, false, false);
7517}
7518
7519static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7520{
7521 return do_FMLAL_zzxw(s, a, false, true);
7522}
7523
7524static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7525{
7526 return do_FMLAL_zzxw(s, a, true, false);
7527}
7528
7529static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7530{
7531 return do_FMLAL_zzxw(s, a, true, true);
7532}
2323c5ff 7533
eec05e4e
RH
7534TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7535 gen_helper_gvec_smmla_b, a, 0)
7536TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7537 gen_helper_gvec_usmmla_b, a, 0)
7538TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7539 gen_helper_gvec_ummla_b, a, 0)
cb8657f7 7540
eec05e4e
RH
7541TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7542 gen_helper_gvec_bfdot, a, 0)
f3500a25
RH
7543TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7544 gen_helper_gvec_bfdot_idx, a)
81266a1f 7545
eec05e4e
RH
7546TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7547 gen_helper_gvec_bfmmla, a, 0)
5693887f
RH
7548
7549static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7550{
41bf9b67
RH
7551 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7552 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
5693887f
RH
7553}
7554
698ddb9d
RH
7555TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7556TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
458d0ab6
RH
7557
7558static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7559{
41bf9b67
RH
7560 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7561 a->rd, a->rn, a->rm, a->ra,
7562 (a->index << 1) | sel, FPST_FPCR);
458d0ab6
RH
7563}
7564
698ddb9d
RH
7565TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7566TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)